Rscience - Parameterized Report

Standard analysis, reporting and R for Science. Imagen lateral
- Reproducibility.
- Transparency.
- Open Access.

Descripción de la imagen

Standard analysis, reporting and R code for Science.
- Reproducibility.
- Transparency.
- Open Access.

Work environment

  • Operating System: Ubuntu 22.04.5 LTS
  • Platform info: x86_64-pc-linux-gnu
  • R Info: R version 4.4.3 (2025-02-28) – “Trophy Case”
  • Rscience Version: XXX— agricolae 1.3.7
  • UTC Time: 2025-10-24 16:22:56 UTC
  • System/Computer Time: 2025-10-24 18:22:56 CEST
  • Inferred Timezone Location: Europe/Rome
    (R can only reliably determine the timezone, not the physical city/country.)
  • Rscience Tool: General Linnear Model, Fixed Effect, anova 1 way
  • Rscience Tool Script: Script XXX-001
  • Dataset file name: mtcars
  • Response Variable: mpg
  • Factor: cyl
  • Alpha value: 0.05

Cita

Rscience is currently in a preliminary phase. There is no formal citation for Rscience.

R Code

# Libraries
  library("htmlwidgets")
  library("knitr")

# Initials
file_name   <- params$"file_name"
file_source <- params$"file_source"
var_name_rv <- params$"var_name_rv"
var_name_factor <- params$"var_name_factor"
alpha_value <- as.numeric(as.character(params$"alpha_value"))
vector_ordered_levels <- params$"vector_ordered_levels"
vector_ordered_colors <- params$"vector_ordered_colors"

#vector_ordered_colors <- params$"vector_ordered_colors"

# Basics lvl 01
check_r_source        <- file_source == "r_source"
check_rscience_source <- file_source == "rscience_source"
check_excel_source    <- file_source == "excel_source"
check_csv_source      <- file_source == "csv_source"

if(check_r_source) my_dataset <- get(file_name)
### INIT CODE ###
# # # # # Section 01 - Libraries ---------------------------------------------
  library("stats")     # General Linear Models
  library("agricolae") # Tukey test
  library("plotly")    # Advanced graphical functions
  library("dplyr")     # Developing with %>%
# # # # # Section 02 - Import dataset ----------------------------------------
  #+++---my_dataset <- _+A+_my_import_sentence_+A+_
  head(x = my_dataset, n = 5)
                   mpg cyl disp  hp drat    wt  qsec vs am gear carb
Mazda RX4         21.0   6  160 110 3.90 2.620 16.46  0  1    4    4
Mazda RX4 Wag     21.0   6  160 110 3.90 2.875 17.02  0  1    4    4
Datsun 710        22.8   4  108  93 3.85 2.320 18.61  1  1    4    1
Hornet 4 Drive    21.4   6  258 110 3.08 3.215 19.44  1  0    3    1
Hornet Sportabout 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2
# # # # # Section 03 - Settings ----------------------------------------------
  #+++---var_name_rv     <- "_+B+_var_name_rv_+B+_"
  #+++---var_name_factor <- "_+B+_var_name_factor_+B+_"
  #+++---alpha_value     <- _+B+_alpha_value_+B+_
  #+++---vector_ordered_levels <- _+C+_vector_ordered_levels_+C+_
  #+++---vector_ordered_colors <- _+C+_vector_ordered_colors_+C+_


# # # # # Section 04 - Standard actions --------------------------------------
  # The factor must be factor data type on R.
  my_dataset[,var_name_factor] <- as.factor(as.character(my_dataset[,var_name_factor]))
  my_dataset[,var_name_factor] <- factor(my_dataset[,var_name_factor], levels = vector_ordered_levels)

# # # # # Section 05 - Alpha and confidence value ----------------------------
  confidence_value <- 1 - alpha_value
  
  df_alpha_confidence <- data.frame(
    "order" = 1:2,
    "detail" = c("alpha value", "confidence value"),
    "probability" = c(alpha_value, confidence_value),
    "percentaje" =  paste0(c(alpha_value, confidence_value)*100, "%")
  )
  df_alpha_confidence
  order           detail probability percentaje
1     1      alpha value        0.05         5%
2     2 confidence value        0.95        95%
# # # # # Section 06 - Selected variables and roles  -------------------------
  vector_all_var_names <- colnames(my_dataset)
  vector_name_selected_vars <- c(var_name_rv, var_name_factor)
  vector_rol_vars <- c("RV", "FACTOR")
  

  df_selected_vars <- data.frame(
    "order" = 1:length(vector_name_selected_vars),
    "var_name" = vector_name_selected_vars,
    "var_number" = match(vector_name_selected_vars, vector_all_var_names),
    "var_letter" = openxlsx::int2col(match(vector_name_selected_vars, vector_all_var_names)),
    "var_role" = vector_rol_vars,
    "doble_reference" = paste0(vector_rol_vars, "(", vector_name_selected_vars, ")")
  )
  df_selected_vars
  order var_name var_number var_letter var_role doble_reference
1     1      mpg          1          A       RV         RV(mpg)
2     2      cyl          2          B   FACTOR     FACTOR(cyl)
  # # # # # Section 05 - minibase ------------------------------------------------
  # Only selected variabless. 
  # Only completed rows. 
  # Factor columns as factor object in R.
  minibase <- na.omit(my_dataset[vector_name_selected_vars])
  #colnames(minibase) <- vector_rol_vars
  minibase[,var_name_factor] <- as.factor(minibase[,var_name_factor])
  head(x = minibase, n = 5)
                   mpg cyl
Mazda RX4         21.0   6
Mazda RX4 Wag     21.0   6
Datsun 710        22.8   4
Hornet 4 Drive    21.4   6
Hornet Sportabout 18.7   8
  # # # Anova control
  # 'VR' must be numeric and 'FACTOR must be factor.
  df_control_minibase <- data.frame(
    "order" = 1:nrow(df_selected_vars),
    "var_name" = df_selected_vars$"var_name",
    "var_role" = df_selected_vars$"var_role",
    "control" = c("is.numeric()", "is.factor()"),
    "verify" = c(is.numeric(minibase[,var_name_rv]), is.factor(minibase[,var_name_factor]))
  )
  df_control_minibase
  order var_name var_role      control verify
1     1      mpg       RV is.numeric()   TRUE
2     2      cyl   FACTOR  is.factor()   TRUE
  # # # my_dataset and minibase reps
  # Our 'n' is from minibase
  df_show_n <- data.frame(
    "object" = c("my_dataset", "minibase"),
    "n_col" = c(ncol(my_dataset), ncol(minibase)),
    "n_row" = c(nrow(my_dataset), nrow(minibase))
  )
  df_show_n
      object n_col n_row
1 my_dataset    11    32
2   minibase     2    32
  # # # Factor info
  # Default order for levels its alphabetic order.
  df_factor_info <- data.frame(
    "order" = 1:nlevels(minibase[,2]),
    "level" = levels(minibase[,2]),
    "n" = as.vector(table(minibase[,2])),
    "mean" = tapply(minibase[,1], minibase[,2], mean),
    "color" = vector_ordered_colors
  )
  rownames(df_factor_info) <- 1:nrow(df_factor_info)
  df_factor_info
  order level  n     mean   color
1     1     6  7 19.74286 #FF0000
2     2     4 11 26.66364 #00FF00
3     3     8 14 15.10000 #0000FF
  # # # Unbalanced reps for levels?
  # Important information for Tukey.
  # If reps its equal or not equal in all levels must be detailled
  # on Tukey.
  check_unbalanced_reps <- length(unique(df_factor_info$n)) > 1
  check_unbalanced_reps
[1] TRUE
  phrase_yes_unbalanced <- "The design is unbalanced in repetitions. A correction is applied to the Tukey test."
  phrase_no_unbalanced  <- "The design is unbalanced in replicates. A correction should be applied to the Tukey test."
  phrase_selected_check_unbalanced <- ifelse(test = check_unbalanced_reps, 
                                  yes = phrase_yes_unbalanced,
                                  no  = phrase_no_unbalanced)
  
  phrase_selected_check_unbalanced
[1] "The design is unbalanced in repetitions. A correction is applied to the Tukey test."
  # # # # # Section 06 - Anova Test ----------------------------------------------
  # # # Anova test
  the_formula <- paste0(var_name_rv,  " ~ " , var_name_factor)
  the_formula <- as.formula (the_formula)
  lm_anova <- lm(formula = the_formula, data = minibase)               # Linear model
  aov_anova <- aov(lm_anova)                                 # R results for anova
  df_table_anova <- as.data.frame(summary(aov_anova)[[1]])   # Common anova table
  df_table_anova
            Df   Sum Sq   Mean Sq  F value       Pr(>F)
cyl          2 824.7846 412.39230 39.69752 4.978919e-09
Residuals   29 301.2626  10.38837       NA           NA
  # # # Standard error from model for each level
  model_error_var_MSE <- df_table_anova$`Mean Sq`[2]
  model_error_sd <- sqrt(model_error_var_MSE)
  
  df_model_error <- data.frame(
    "order" = df_factor_info$order,
    "level" = df_factor_info$level,
    "n" = df_factor_info$n,
    "model_error_var_MSE" = model_error_var_MSE,
    "model_error_sd" = model_error_sd
  )
  df_model_error["model_error_se"] <- df_model_error["model_error_sd"]/sqrt(df_model_error$n)
  df_model_error
  order level  n model_error_var_MSE model_error_sd model_error_se
1     1     6  7            10.38837       3.223099      1.2182168
2     2     4 11            10.38837       3.223099      0.9718008
3     3     8 14            10.38837       3.223099      0.8614094
  # # # # # Section 07 - minibase_mod --------------------------------------------
  # # # Detect rows on my_dataset there are on minibase
  dt_rows_my_dataset_ok <- rowSums(!is.na(my_dataset[vector_name_selected_vars])) == ncol(minibase)
  
  
  
  # # # Object minibase_mod and new cols
  minibase_mod <- minibase
  minibase_mod$"lvl_order_number" <- as.numeric(minibase_mod[,2])
  minibase_mod$"lvl_color" <- df_factor_info$color[minibase_mod$"lvl_order_number"]
  minibase_mod$"fitted.values" <- df_factor_info$"mean"[minibase_mod$"lvl_order_number"]
  minibase_mod$"residuals" <- lm_anova$residuals
  minibase_mod$"id_my_dataset" <- c(1:nrow(my_dataset))[dt_rows_my_dataset_ok]
  minibase_mod$"id_minibase" <- 1:nrow(minibase)
  minibase_mod$"studres" <- minibase_mod$"residuals"/model_error_sd
  
  
  
  
  
  # # # # # Section 08 - Requeriments for residuals-------------------------------
  # # # Normality test (Shapiro-Wilk)
  test_residuals_normality <- shapiro.test(minibase_mod$residuals)
  test_residuals_normality

    Shapiro-Wilk normality test

data:  minibase_mod$residuals
W = 0.97065, p-value = 0.5177
  # # # Homogeinidy test (Bartlett)
  the_formula_bartlett <- paste0("residuals", " ~ ", var_name_factor)
  the_formula_bartlett <- as.formula(the_formula_bartlett)
  test_residuals_homogeneity <- bartlett.test(the_formula_bartlett, data = minibase_mod)
  test_residuals_homogeneity

    Bartlett test of homogeneity of variances

data:  residuals by cyl
Bartlett's K-squared = 8.3934, df = 2, p-value = 0.01505
  # # # Residuals variance from levels from original residuals
  df_raw_error <- data.frame(
    "order" = 1:nlevels(minibase_mod[,2]),
    "level" = levels(minibase_mod[,2]),
    "n" = tapply(minibase_mod$residuals, minibase_mod[,2], length),
    "raw_error_var" = tapply(minibase_mod$residuals, minibase_mod[,2], var),
    "raw_error_sd" = tapply(minibase_mod$residuals, minibase_mod[,2], sd)
  )
  df_model_error["raw_error_se"] <- df_model_error["model_error_sd"]/sqrt(df_model_error$"n")
  rownames(df_raw_error) <- 1:nrow(df_raw_error)
  df_raw_error
  order level  n raw_error_var raw_error_sd
1     1     6  7      2.112857     1.453567
2     2     4 11     20.338545     4.509828
3     3     8 14      6.553846     2.560048
  phrase_info_errors <- c("Anova and Tukey use MSE from model.", 
                          "Bartlett use variance from raw error on each level.",
                          "Only if there is homogeneity from raw error variances then is a good idea take desition from MSE.")
  phrase_info_errors
[1] "Anova and Tukey use MSE from model."                                                              
[2] "Bartlett use variance from raw error on each level."                                              
[3] "Only if there is homogeneity from raw error variances then is a good idea take desition from MSE."
  # # # Sum for residuals
  sum_residuals <- sum(minibase_mod$residuals)
  sum_residuals
[1] -4.191092e-15
  # # # Mean for residuals
  mean_residuals <- mean(minibase_mod$residuals)
  mean_residuals
[1] -1.309716e-16
  ##################################
  p_value_shapiro  <- test_residuals_normality$p.value
  p_value_bartlett <- test_residuals_homogeneity$p.value
  p_value_anova    <- df_table_anova$"Pr(>F)"[1]
  
  vector_p_value <- c(p_value_shapiro, p_value_bartlett, p_value_anova)
  vector_logic_rejected <- vector_p_value < alpha_value
  vector_ho_decision <- ifelse(test = vector_logic_rejected, yes = "Ho Rejected", "Ho no rejected")
  vector_ho_rejected <- ifelse(test = vector_logic_rejected, yes = "Yes", "No")
  
  df_summary_anova <- data.frame(
    "test" = c("Shapiro-Wilk test", "Bartlett test", "Anova 1 way"),
    "aim"  = c("Normality", "Homogeneity", "Mean"),
    "variable"    = c("residuals", "residuals", var_name_rv),
    "p_value"     = vector_p_value,
    "alpha_value" = c(alpha_value, alpha_value, alpha_value),
    "Decision"    = vector_ho_decision
  )
  
  df_summary_anova
               test         aim  variable      p_value alpha_value
1 Shapiro-Wilk test   Normality residuals 5.176650e-01        0.05
2     Bartlett test Homogeneity residuals 1.504518e-02        0.05
3       Anova 1 way        Mean       mpg 4.978919e-09        0.05
        Decision
1 Ho no rejected
2    Ho Rejected
3    Ho Rejected
  check_shapiro_rejected      <- p_value_shapiro < alpha_value
  phrase_shapiro_yes_rejected <- "The null hypothesis of normal distribution of residuals is rejected."
  phrase_shapiro_no_rejected  <- "The null hypothesis of normal distribution of residuals is not rejected."
  phrase_shapiro_selected     <- ifelse(test = check_shapiro_rejected, 
                                        yes = phrase_shapiro_yes_rejected, 
                                        no = phrase_shapiro_no_rejected)
  phrase_shapiro_selected 
[1] "The null hypothesis of normal distribution of residuals is not rejected."
  check_bartlett_rejected      <- p_value_bartlett < alpha_value
  phrase_bartlett_yes_rejected <- "The hypothesis of homogeneity of variances (homoscedasticity) is rejected."
  phrase_bartlett_no_rejected  <- "The hypothesis of homogeneity of variances (homoscedasticity) is not rejected."
  phrase_bartlett_selected     <- ifelse(test = check_bartlett_rejected, 
                                         yes = phrase_bartlett_yes_rejected, 
                                         no = phrase_bartlett_no_rejected)
  phrase_bartlett_selected
[1] "The hypothesis of homogeneity of variances (homoscedasticity) is rejected."
  check_ok_all_requeriments     <- sum(vector_logic_rejected[c(1,2)]) == 2
  phrase_requeriments_yes_valid <- "All residual assumptions are met, so it is valid to draw conclusions from the ANOVA test."
  phrase_requeriments_no_valid  <- "Not all model assumptions are met, so it is NOT valid to draw conclusions from the ANOVA test."
  phrase_requeriments_selected  <- ifelse(test = check_ok_all_requeriments, 
                                          yes = phrase_requeriments_yes_valid, 
                                          no = phrase_requeriments_no_valid)
  
  phrase_requeriments_selected  
[1] "Not all model assumptions are met, so it is NOT valid to draw conclusions from the ANOVA test."
  check_anova_rejected      <- p_value_anova < alpha_value
  phrase_anova_yes_rejected <- "The null hypothesis of the ANOVA test is rejected. There are statistically significant differences in at least one level of the factor."
  phrase_anova_no_rejected  <- "The null hypothesis of the ANOVA test is not rejected. All levels of the factor are statistically equal."
  phrase_anova_selected     <- ifelse(test = check_anova_rejected, 
                                      yes = phrase_anova_yes_rejected, 
                                      no = phrase_anova_no_rejected)
  
  phrase_anova_selected <- ifelse(
    test = check_ok_all_requeriments,
    yes = phrase_anova_selected,
    no = "Regardless of the p-value obtained in ANOVA, it is not valid to draw conclusions."
  )
  
  phrase_anova_selected
[1] "Regardless of the p-value obtained in ANOVA, it is not valid to draw conclusions."
  ##############################################################################
  tukey01_full_groups <- agricolae::HSD.test(y = lm_anova,
                                             trt = colnames(minibase_mod)[2],
                                             alpha = alpha_value,
                                             group = TRUE,
                                             console = FALSE,
                                             unbalanced = check_unbalanced_reps)
  
  
  
  # # # Tukey test - Tukey pairs comparation - Full version
  tukey02_full_pairs <- agricolae::HSD.test(y = lm_anova,
                                            trt = colnames(minibase_mod)[2],
                                            alpha = alpha_value,
                                            group = FALSE,
                                            console = FALSE,
                                            unbalanced = check_unbalanced_reps)
  
  
  
  # # Original table from R about Tukey
  df_tukey_original_table <- tukey01_full_groups$groups
  df_tukey_original_table
       mpg groups
4 26.66364      a
6 19.74286      b
8 15.10000      c
  # # # New table about Tukey
  df_tukey_table <- data.frame(
    "level" = rownames(tukey01_full_groups$groups),
    "mean" = tukey01_full_groups$groups[,1],
    "group" = tukey01_full_groups$groups[,2]
  )
  df_tukey_table
  level     mean group
1     4 26.66364     a
2     6 19.74286     b
3     8 15.10000     c
  # # # # # Section 10 - Partitioned Measures (VR)--------------------------------
  # # # Partitioned Measures of Position (VR)
  df_vr_position_levels <- data.frame(
    "order" = 1:nlevels(minibase[,2]),
    "level" = levels(minibase[,2]),
    "min" = tapply(minibase[,1], minibase[,2], min),
    "mean" = tapply(minibase[,1], minibase[,2], mean),
    "Q1" = tapply(minibase[,1], minibase[,2], quantile, 0.25),
    "median" = tapply(minibase[,1], minibase[,2], median),
    "Q3" = tapply(minibase[,1], minibase[,2], quantile, 0.75),
    "max" = tapply(minibase[,1], minibase[,2], max),
    "n" = tapply(minibase[,1], minibase[,2], length)
  )
  
  
  
  # # # Partitioned Measures of Dispersion (VR)
  df_vr_dispersion_levels <- data.frame(
    "order" = 1:nlevels(minibase[,2]),
    "level" = levels(minibase[,2]),
    "range" = tapply(minibase[,1], minibase[,2], function(x){max(x) - min(x)}),
    "variance" = tapply(minibase[,1], minibase[,2], var),
    "standard_deviation" = tapply(minibase[,1], minibase[,2], sd),
    "standard_error" = tapply(minibase[,1], minibase[,2], function(x){sd(x)/sqrt(length(x))}),
    "n" = tapply(minibase[,1], minibase[,2], length)
  )
  df_vr_dispersion_levels
  order level range  variance standard_deviation standard_error  n
6     1     6   3.6  2.112857           1.453567      0.5493967  7
4     2     4  12.5 20.338545           4.509828      1.3597642 11
8     3     8   8.8  6.553846           2.560048      0.6842016 14
  # # # General Measures of Position (VR)
  df_vr_position_general <- data.frame(
    "min" = min(minibase[,1]),
    "mean" = mean(minibase[,1]),
    "median" = median(minibase[,1]),
    "max" = max(minibase[,1]),
    "n" = length(minibase[,1])
  )
  df_vr_position_general
   min     mean median  max  n
1 10.4 20.09062   19.2 33.9 32
  # # # General Measures of Dispersion (VR)
  df_vr_dispersion_general <- data.frame(
    "range" = max(minibase[,1]) - min(minibase[,1]),
    "variance" = var(minibase[,1]),
    "standard_deviation" = sd(minibase[,1]),
    "standard_error" = sd(minibase[,1])/(sqrt(length(minibase[,1]))),
    "n" = length(minibase[,1])
  )
  df_vr_dispersion_general
  range variance standard_deviation standard_error  n
1  23.5  36.3241           6.026948       1.065424 32
  # # # # # Section 11 - Partitioned Measures (Residuals)-------------------------
  # # # Partitioned Measures of Position (residuals)
  df_residuals_position_levels <- data.frame(
    "order" = 1:nlevels(minibase_mod[,2]),
    "level" = levels(minibase_mod[,2]),
    "min" = tapply(minibase_mod$residuals, minibase_mod[,2], min),
    "mean" = tapply(minibase_mod$residuals, minibase_mod[,2], mean),
    "median" = tapply(minibase_mod$residuals, minibase_mod[,2], median),
    "max" = tapply(minibase_mod$residuals, minibase_mod[,2], max),
    "n" = tapply(minibase_mod$residuals, minibase_mod[,2], length)
  )
  df_residuals_position_levels
  order level       min          mean      median      max  n
6     1     6 -1.942857  3.410101e-16 -0.04285714 1.657143  7
4     2     4 -5.263636 -1.918446e-16 -0.66363636 7.236364 11
8     3     8 -4.700000 -3.191891e-16  0.10000000 4.100000 14
  # # # Partitioned Measures of Dispersion (residuals)
  df_residual_dispersion_levels <- data.frame(
    "order" = 1:nlevels(minibase_mod[,2]),
    "level" = levels(minibase_mod[,2]),
    "range" = tapply(minibase_mod$residuals, minibase_mod[,2], function(x){max(x) - min(x)}),
    "variance" = tapply(minibase_mod$residuals, minibase_mod[,2], var),
    "standard_deviation" = tapply(minibase_mod$residuals, minibase_mod[,2], sd),
    "standard_error" = tapply(minibase_mod$residuals, minibase_mod[,2], function(x){sd(x)/sqrt(length(x))}),
    "n" = tapply(minibase_mod$residuals, minibase_mod[,2], length)
  )
  df_residual_dispersion_levels
  order level range  variance standard_deviation standard_error  n
6     1     6   3.6  2.112857           1.453567      0.5493967  7
4     2     4  12.5 20.338545           4.509828      1.3597642 11
8     3     8   8.8  6.553846           2.560048      0.6842016 14
  # # # General Measures of Position (residuals)
  df_residuals_position_general <- data.frame(
    "min" = min(minibase_mod$residuals),
    "mean" = mean(minibase_mod$residuals),
    "median" = median(minibase_mod$residuals),
    "max" = max(minibase_mod$residuals),
    "n" = length(minibase_mod$residuals)
  )
  df_residuals_position_general
        min          mean     median      max  n
1 -5.263636 -1.309716e-16 0.02857143 7.236364 32
  # # # General Measures of Dispersion (residuals)
  df_residuals_dispersion_general <- data.frame(
    "range" = max(minibase_mod$residuals) - min(minibase_mod$residuals),
    "variance" = var(minibase_mod$residuals),
    "standard_deviation" = sd(minibase_mod$residuals),
    "standard_error" = sd(minibase_mod$residuals)/(sqrt(length(minibase_mod$residuals))),
    "n" = length(minibase_mod$residuals)
  )
  df_residuals_dispersion_general
  range variance standard_deviation standard_error  n
1  12.5 9.718148           3.117394      0.5510827 32
  # # # # # Section 12 - Model estimators ----------------------------------------
  # # # Means for each level
  vector_est_mu_i <- df_vr_position_levels$mean
  vector_est_mu_i
[1] 19.74286 26.66364 15.10000
  # # # Mean of means
  est_mu <- mean(vector_est_mu_i)
  vector_est_mu <- rep(est_mu, length(vector_est_mu_i))
  vector_est_mu
[1] 20.50216 20.50216 20.50216
  # # # Tau efects
  vector_est_tau_i <- vector_est_mu_i - vector_est_mu
  vector_est_tau_i
[1] -0.7593074  6.1614719 -5.4021645
  # # # Sum of tau efects
  sum_est_tau_i <- sum(vector_est_tau_i)
  sum_est_tau_i
[1] -5.329071e-15
  # # # Long model information on dataframe
  df_anova1way_model_long <- data.frame(
    "order" = df_factor_info$order,
    "level" = df_factor_info$level,
    "n" = df_factor_info$n,
    "est_mu" = vector_est_mu,
    "est_tau_i" = vector_est_tau_i
  )
  df_anova1way_model_long
  order level  n   est_mu  est_tau_i
1     1     6  7 20.50216 -0.7593074
2     2     4 11 20.50216  6.1614719
3     3     8 14 20.50216 -5.4021645
  # # # Short model information on dataframe
  df_anova1way_model_short <- data.frame(
    "order" = df_factor_info$order,
    "level" = df_factor_info$level,
    "n" = df_factor_info$n,
    "est_mu_i" = vector_est_mu_i
  )
  df_anova1way_model_short
  order level  n est_mu_i
1     1     6  7 19.74286
2     2     4 11 26.66364
3     3     8 14 15.10000
  # # # # # Section 13 - Special table to plots ----------------------------------
  
  # # # Table for plot001
  df_table_factor_plot001 <- data.frame(
    "order" = df_factor_info$order,
    "level" = df_factor_info$level,
    "n" = df_factor_info$n,
    "mean" = tapply(minibase[,1], minibase[,2], mean),
    "min" = tapply(minibase[,1], minibase[,2], min),
    "max" = tapply(minibase[,1], minibase[,2], max),
    "sd" = tapply(minibase[,1], minibase[,2], sd),
    "var" = tapply(minibase[,1], minibase[,2], var)
  )
  
  df_table_factor_plot002 <- data.frame(
    "order" = df_factor_info$order,
    "level" = df_factor_info$level,
    "n" = df_factor_info$n,
    "mean" = tapply(minibase[,1], minibase[,2], mean),
    "model_error_sd" = df_model_error$model_error_sd
  )
  df_table_factor_plot002["lower_limit"] <- df_table_factor_plot002$mean - df_table_factor_plot002$model_error_sd
  df_table_factor_plot002["upper_limmit"] <- df_table_factor_plot002$mean + df_table_factor_plot002$model_error_sd
  df_table_factor_plot002["color"] <- df_factor_info$color
  df_table_factor_plot002
  order level  n     mean model_error_sd lower_limit upper_limmit   color
6     1     6  7 19.74286       3.223099    16.51976     22.96596 #FF0000
4     2     4 11 26.66364       3.223099    23.44054     29.88674 #00FF00
8     3     8 14 15.10000       3.223099    11.87690     18.32310 #0000FF
  df_table_factor_plot003 <- data.frame(
    "order" = df_factor_info$order,
    "level" = df_factor_info$level,
    "n" = df_factor_info$n,
    "mean" = tapply(minibase[,1], minibase[,2], mean),
    "model_error_se" = df_model_error$model_error_se
  )
  df_table_factor_plot003["lower_limit"] <- df_table_factor_plot003$mean - df_table_factor_plot003$model_error_se
  df_table_factor_plot003["upper_limmit"] <- df_table_factor_plot003$mean + df_table_factor_plot003$model_error_se
  df_table_factor_plot003["color"] <- df_factor_info$color
  df_table_factor_plot003
  order level  n     mean model_error_se lower_limit upper_limmit   color
6     1     6  7 19.74286      1.2182168    18.52464     20.96107 #FF0000
4     2     4 11 26.66364      0.9718008    25.69184     27.63544 #00FF00
8     3     8 14 15.10000      0.8614094    14.23859     15.96141 #0000FF
  # # # Table for plot004
  df_table_factor_plot004 <- df_vr_position_levels
  df_table_factor_plot004["color"] <- df_factor_info$color
  
  # # # Table for plot005
  df_table_factor_plot005 <- df_table_factor_plot004
  
  # # # Table for plot006
  df_table_factor_plot006 <- df_table_factor_plot004
  
  
  df_table_factor_plot007 <- df_table_factor_plot003
  correct_pos_letters <- order(df_tukey_table$level)
  vector_letters <- df_tukey_table$group[correct_pos_letters]
  df_table_factor_plot007["group"] <- vector_letters
  
  # # # Table for plot006
  df_table_residuals_plot001 <- data.frame(
    "order" = 1:nlevels(minibase_mod[,2]),
    "level" = levels(minibase_mod[,2]),
    "n" = tapply(minibase_mod$residuals, minibase_mod[,2], length),
    "min" = tapply(minibase_mod$residuals, minibase_mod[,2], min),
    "mean" = tapply(minibase_mod$residuals, minibase_mod[,2], mean),
    "max" = tapply(minibase_mod$residuals, minibase_mod[,2], max),
    "var" = tapply(minibase_mod$residuals, minibase_mod[,2], var),
    "sd" = tapply(minibase_mod$residuals, minibase_mod[,2], sd),
    "color" = df_factor_info$color
  )
  df_table_residuals_plot001
  order level  n       min          mean      max       var       sd   color
6     1     6  7 -1.942857  3.410101e-16 1.657143  2.112857 1.453567 #FF0000
4     2     4 11 -5.263636 -1.918446e-16 7.236364 20.338545 4.509828 #00FF00
8     3     8 14 -4.700000 -3.191891e-16 4.100000  6.553846 2.560048 #0000FF
  # # # Table for plot006
  df_table_residuals_plot002 <- df_table_residuals_plot001
  
  # # # Table for plot006
  df_table_residuals_plot003 <- df_table_residuals_plot001
  
  # # # Table for plot006
  df_table_residuals_plot004 <- data.frame(
    "variable" = "residuals",
    "n" = length(minibase_mod$residuals),
    "min" = min(minibase_mod$residuals),
    "mean" = mean(minibase_mod$residuals),
    "max" = max(minibase_mod$residuals),
    "var" = var(minibase_mod$residuals),
    "sd" = sd(minibase_mod$residuals),
    "model_error_var_MSE" = model_error_var_MSE,
    "model_error_sd" = model_error_sd
  )
  
  phrase_coment_errors <- "Model Error (MSE) "
  
  # # # Table for plot006
  df_table_residuals_plot005  <- df_table_residuals_plot004
  
  # # # Table for plot006
  df_table_residuals_plot006 <- data.frame(
    "order" = 1:nlevels(minibase_mod[,2]),
    "level" = levels(minibase_mod[,2]),
    "n" = tapply(minibase_mod$studres, minibase_mod[,2], length),
    "min" = tapply(minibase_mod$studres, minibase_mod[,2], min),
    "mean" = tapply(minibase_mod$studres, minibase_mod[,2], mean),
    "max" = tapply(minibase_mod$studres, minibase_mod[,2], max),
    "var" = tapply(minibase_mod$studres, minibase_mod[,2], var),
    "sd" = tapply(minibase_mod$studres, minibase_mod[,2], sd),
    "color" = df_factor_info$color
  )
  
  
  # # # Table for plot006
  df_table_residuals_plot007 <- df_table_residuals_plot006
  
  
  df_table_residuals_plot008 <- data.frame(
    "variable" = "studres",
    "n" = length(minibase_mod$studres),
    "min" = min(minibase_mod$studres),
    "mean" = mean(minibase_mod$studres),
    "max" = max(minibase_mod$studres),
    "var" = var(minibase_mod$studres),
    "sd" = sd(minibase_mod$studres)
  )
  
  
  df_table_residuals_plot009 <- df_table_residuals_plot008
  
  df_table_residuals_plot010 <- df_table_residuals_plot008
  
  #############################################################
  # # # Create a new plot...
  plot001_factor <- plotly::plot_ly()
  
  # # # Plot001 - Scatter plot for VR and FACTOR on minibase_mod *****************
  plot001_factor <- plotly::add_trace(p = plot001_factor,
                                      type = "scatter",
                                      mode = "markers",
                                      x = minibase_mod[,var_name_factor],
                                      y = minibase_mod[,var_name_rv],
                                      color = minibase_mod[,var_name_factor],
                                      colors = df_factor_info$color,
                                      marker = list(size = 15, opacity = 0.7))
  
  # # # Title and settings...
  plot001_factor <-   plotly::layout(p = plot001_factor,
                                     title = "Plot 001 - Scatterplot",
                                     font = list(size = 20),
                                     margin = list(t = 100))
  
  
  # # # Without zerolines
  plot001_factor <-   plotly::layout(p = plot001_factor,
                                     xaxis = list(zeroline = FALSE),
                                     yaxis = list(zeroline = FALSE))
  
  
  # # # Plot output
  plot001_factor
  ##############################################################################
  
  # # # Create a new plot...
  plot002_factor <- plot_ly()
  
  
  # # # Adding errors...
  plot002_factor <-   add_trace(p = plot002_factor,
                                type = "scatter",
                                mode = "markers",
                                x = df_table_factor_plot002$level,
                                y = df_table_factor_plot002$mean,
                                color = df_table_factor_plot002$level,
                                colors = df_table_factor_plot002$color,
                                marker = list(symbol = "line-ew-open",
                                              size = 50,
                                              opacity = 1,
                                              line = list(width = 5)),
                                error_y = list(type = "data", array = df_table_factor_plot002$model_error_sd)
  )
  
  
  # # # Title and settings...
  plot002_factor <- plotly::layout(p = plot002_factor,
                                   title = "Plot 002 - Mean and model standard deviation",
                                   font = list(size = 20),
                                   margin = list(t = 100))
  
  # # # Without zerolines
  plot002_factor <-plotly::layout(p = plot002_factor,
                                  xaxis = list(zeroline = FALSE),
                                  yaxis = list(zeroline = FALSE))
  
  # # # Plot output
  plot002_factor
  ##############################################################################
  
  
  # # # Create a new plot...
  plot003_factor <- plotly::plot_ly()
  
  
  # # # Adding errors...
  plot003_factor <-   plotly::add_trace(p = plot003_factor,
                                        type = "scatter",
                                        mode = "markers",
                                        x = df_table_factor_plot003$level,
                                        y = df_table_factor_plot003$mean,
                                        color = df_table_factor_plot003$level,
                                        colors = df_table_factor_plot003$color,
                                        marker = list(symbol = "line-ew-open",
                                                      size = 50,
                                                      opacity = 1,
                                                      line = list(width = 5)),
                                        error_y = list(type = "data", array = df_table_factor_plot003$model_error_se)
  )
  
  
  # # # Title and settings...
  plot003_factor <- plotly::layout(p = plot003_factor,
                                   title = "Plot 003 - Mean y model standard error",
                                   font = list(size = 20),
                                   margin = list(t = 100))
  
  # # # Without zerolines
  plot003_factor <-plotly::layout(p = plot003_factor,
                                  xaxis = list(zeroline = FALSE),
                                  yaxis = list(zeroline = FALSE))
  
  # # # Plot output
  plot003_factor
  ##############################################################################
  
  
  # # # New plotly...
  plot004_factor <- plotly::plot_ly()
  
  # # # Boxplot and info...
  plot004_factor <- plotly::add_trace(p = plot004_factor,
                                      type = "box",
                                      x = df_table_factor_plot004$level ,
                                      color = df_table_factor_plot004$level,
                                      colors = df_table_factor_plot004$color,
                                      lowerfence = df_table_factor_plot004$min,
                                      q1 = df_table_factor_plot004$Q1,
                                      median = df_table_factor_plot004$median,
                                      q3 = df_table_factor_plot004$Q3,
                                      upperfence = df_table_factor_plot004$max,
                                      boxmean = TRUE,
                                      boxpoints = FALSE,
                                      line = list(color = "black", width = 3)
  )
  
  # # # Title and settings...
  plot004_factor <- plotly::layout(p = plot004_factor,
                                   title = "Plot 004 - Boxplot and means",
                                   font = list(size = 20),
                                   margin = list(t = 100))
  
  
  # # # Without zerolines...
  plot004_factor <- plotly::layout(p = plot004_factor,
                                   xaxis = list(zeroline = FALSE),
                                   yaxis = list(zeroline = FALSE))
  
  # # # Output plot004_anova...
  plot004_factor
  ##############################################################################
  
  all_levels <- levels(minibase_mod[,2])
  n_levels <- length(all_levels)
  all_color <- rainbow(length(all_levels))
  
  
  
  plot005_factor <- plot_ly()
  
  # Violinplot
  for (k in 1:n_levels){
    
    # Selected values
    selected_level <- all_levels[k]
    selected_color <- all_color[k]
    dt_filas <- minibase_mod[,2] == selected_level
    
    # Plotting selected violinplot
    plot005_factor <- plot005_factor %>%
      add_trace(x = minibase_mod[,2][dt_filas],
                y = minibase_mod[,1][dt_filas],
                type = "violin",
                name = paste0("violin", k),
                points = "all",
                marker = list(color = selected_color),
                line = list(color = selected_color),
                fillcolor = I(selected_color)
                
      )
    
    
  }
  
  
  
  
  # Boxplot
  plot005_factor <- plotly::add_trace(p = plot005_factor,
                                      type = "box",
                                      name = "boxplot",
                                      x = df_table_factor_plot005$level ,
                                      color = df_table_factor_plot005$level ,
                                      lowerfence = df_table_factor_plot005$min,
                                      q1 = df_table_factor_plot005$Q1,
                                      median = df_table_factor_plot005$median,
                                      q3 = df_table_factor_plot005$Q3,
                                      upperfence = df_table_factor_plot005$max,
                                      boxmean = TRUE,
                                      boxpoints = TRUE,
                                      fillcolor = df_table_factor_plot005$color,
                                      line = list(color = "black", width = 3),
                                      opacity = 0.5,
                                      width = 0.2)
  
  
  # # # Title and settings...
  plot005_factor <- plotly::layout(p = plot005_factor,
                                   title = "Plot 005 - Violinplot",
                                   font = list(size = 20),
                                   margin = list(t = 100))
  
  
  # # # Without zerolines...
  plot005_factor <- plotly::layout(p = plot005_factor,
                                   xaxis = list(zeroline = FALSE),
                                   yaxis = list(zeroline = FALSE))
  
  # # # Output plot003_anova...
  plot005_factor
  ##############################################################################
  
  
  #library(plotly)
  plot006_factor <- plotly::plot_ly()
  
  # Add traces
  plot006_factor <- plotly::add_trace(p = plot006_factor,
                                      type = "violin",
                                      y = minibase_mod$VR,
                                      x = minibase_mod$FACTOR,
                                      showlegend = TRUE,
                                      side = "positive",
                                      points = "all",
                                      name = "Violinplot",
                                      color = minibase_mod$FACTOR,
                                      colors = df_table_factor_plot006$color)
  
  
  
  # # # Title and settings...
  plot006_factor <- plotly::layout(p = plot006_factor,
                                   title = "Plot 006 - Scatterplot + Jitter +  Smoothed",
                                   font = list(size = 20),
                                   margin = list(t = 100))
  
  
  # # # Without zerolines...
  plot006_factor <- plotly::layout(p = plot006_factor,
                                   xaxis = list(zeroline = FALSE),
                                   yaxis = list(zeroline = FALSE))
  
  # # # Output plot003_anova...
  plot006_factor
  ##############################################################################
  
  # # # Create a new plot...
  plot007_factor <- plotly::plot_ly()
  
  
  # # # Adding errors...
  plot007_factor <-   plotly::add_trace(p = plot007_factor,
                                        type = "scatter",
                                        mode = "markers",
                                        x = df_table_factor_plot007$level,
                                        y = df_table_factor_plot007$mean,
                                        color = df_table_factor_plot007$level,
                                        colors = df_table_factor_plot007$color,
                                        marker = list(symbol = "line-ew-open",
                                                      size = 50,
                                                      opacity = 1,
                                                      line = list(width = 5)),
                                        error_y = list(type = "data", array = df_table_factor_plot007$model_error_se)
  )
  
  
  
  plot007_factor <-  add_text(p = plot007_factor,
                              x = df_table_factor_plot007$level,
                              y = df_table_factor_plot007$mean,
                              text = df_table_factor_plot007$group, name = "Tukey Group",
                              size = 20)
  
  # # # Title and settings...
  plot007_factor <- plotly::layout(p = plot007_factor,
                                   title = "Plot 007 - Mean y model standard error",
                                   font = list(size = 20),
                                   margin = list(t = 100))
  
  # # # Without zerolines
  plot007_factor <-plotly::layout(p = plot007_factor,
                                  xaxis = list(zeroline = FALSE),
                                  yaxis = list(zeroline = FALSE))
  
  
  # # # Plot output
  plot007_factor
  #######----
  
  ####### DESDE ACAAAAAAAAAAAAAAAAAAAA
  # # # Create a new plot...
  plot001_residuals <- plotly::plot_ly()
  
  # # # Plot001 - Scatter plot for VR and FACTOR on minibase_mod *****************
  plot001_residuals <- plotly::add_trace(p = plot001_residuals,
                                         type = "scatter",
                                         mode = "markers",
                                         x = minibase_mod$FACTOR,
                                         y = minibase_mod$residuals,
                                         color = minibase_mod$FACTOR,
                                         colors = df_factor_info$color,
                                         marker = list(size = 15, opacity = 0.7))
  
  # # # Title and settings...
  plot001_residuals <-   plotly::layout(p = plot001_residuals,
                                        title = "Plot 001 - Scatterplot - Residuals",
                                        font = list(size = 20),
                                        margin = list(t = 100))
  
  
  # # # Without zerolines
  plot001_residuals <-   plotly::layout(p = plot001_residuals,
                                        xaxis = list(zeroline = FALSE),
                                        yaxis = list(zeroline = TRUE))
  
  
  # # # Plot output
  plot001_residuals
  #library(plotly)
  plot002_residuals <- plotly::plot_ly()
  
  # Add traces
  plot002_residuals <- plotly::add_trace(p = plot002_residuals,
                                         type = "violin",
                                         y = minibase_mod$residuals,
                                         x = minibase_mod$FACTOR,
                                         showlegend = TRUE,
                                         side = "positive",
                                         points = "all",
                                         name = "Violinplot",
                                         color = minibase_mod$FACTOR,
                                         colors = df_table_residuals_plot002$color)
  
  
  
  # # # Title and settings...
  plot002_residuals <- plotly::layout(p = plot002_residuals,
                                      title = "Plot 002 - Residuals - Scatterplot + Jitter +  Smoothed",
                                      font = list(size = 20),
                                      margin = list(t = 100))
  
  
  # # # Without zerolines...
  plot002_residuals <- plotly::layout(p = plot002_residuals,
                                      xaxis = list(zeroline = FALSE),
                                      yaxis = list(zeroline = FALSE))
  
  # # # Output plot003_anova...
  plot002_residuals
  plot003_residuals <- plotly::plot_ly()
  
  # Add traces
  plot003_residuals <- plotly::add_trace(p = plot003_residuals,
                                         type = "violin",
                                         x = minibase_mod$residuals,
                                         showlegend = TRUE,
                                         side = "positive",
                                         points = FALSE,
                                         #name = levels(minibase_mod$FACTOR)[minibase_mod$lvl_order_number],
                                         color = minibase_mod$FACTOR,
                                         colors = df_table_residuals_plot003$color)
  
  
  
  # # # Title and settings...
  plot003_residuals <- plotly::layout(p = plot003_residuals,
                                      title = "Plot 003 - Residuals - Scatterplot + Jitter +  Smoothed",
                                      font = list(size = 20),
                                      margin = list(t = 100))
  
  
  # # # Without zerolines...
  plot003residuals <- plotly::layout(p = plot003_residuals,
                                     xaxis = list(zeroline = FALSE),
                                     yaxis = list(zeroline = FALSE))
  
  # # # Output plot003_anova...
  plot003_residuals
  plot004_residuals <- plotly::plot_ly()
  
  # Add traces
  plot004_residuals <- plotly::add_trace(p = plot004_residuals,
                                         type = "violin",
                                         x = minibase_mod$residuals,
                                         #x = minibase_mod$FACTOR,
                                         showlegend = TRUE,
                                         side = "positive",
                                         points = "all",
                                         name = " ")#
  #color = minibase_mod$FACTOR,
  #colors = df_table_factor_plot006$color)
  
  
  
  # # # Title and settings...
  plot004_residuals <- plotly::layout(p = plot004_residuals,
                                      title = "Plot 004 - Residuals",
                                      font = list(size = 20),
                                      margin = list(t = 100))
  
  
  # # # Without zerolines...
  plot004_residuals <- plotly::layout(p = plot004_residuals,
                                      xaxis = list(zeroline = TRUE),
                                      yaxis = list(zeroline = FALSE))
  
  # # # Output plot003_anova...
  plot004_residuals
  # - el 5
  qq_info <- EnvStats::qqPlot(x = minibase_mod$residuals, plot.it = F,
                              param.list = list(mean = mean(minibase_mod$residuals),
                                                sd = sd(minibase_mod$residuals)))
  
  cuantiles_teoricos <- qq_info$x
  cuantiles_observados <- qq_info$y
  
  #library(plotly)
  plot005_residuals <- plotly::plot_ly()
  
  # Crear el gráfico QQ plot
  plot005_residuals <-add_trace(p = plot005_residuals,
                                x = cuantiles_teoricos,
                                y = cuantiles_observados,
                                type = 'scatter', mode = 'markers',
                                marker = list(color = 'blue'),
                                name = "points")
  
  # Agregar la línea de identidad
  pendiente <- 1
  intercepto <- 0
  
  # Calcular las coordenadas de los extremos de la línea de identidad
  x_extremos <- range(cuantiles_teoricos)
  y_extremos <- pendiente * x_extremos + intercepto
  
  # Agregar la recta de identidad
  plot005_residuals <- add_trace(p = plot005_residuals,
                                 x = x_extremos,
                                 y = y_extremos,
                                 type = 'scatter',
                                 mode = 'lines',
                                 line = list(color = 'red'),
                                 name = "identity")
  
  
  # Establecer etiquetas de los ejes
  # plot007_residuals <- layout(p = plot007_residuals,
  #                             xaxis = list(title = 'Expected quantiles'),
  #                             yaxis = list(title = 'Observed quantiles'))
  
  plot005_residuals <- plotly::layout(p = plot005_residuals,
                                      title = "Plot 005 - QQ Plot Residuals",
                                      font = list(size = 20),
                                      margin = list(t = 100))
  
  # Mostrar el gráfico
  plot005_residuals
  # - Fin el 5
  
  
  
  # # # Create a new plot...
  plot006_residuals <- plotly::plot_ly()
  
  # # # Plot001 - Scatter plot for VR and FACTOR on minibase_mod *****************
  plot006_residuals <- plotly::add_trace(p = plot006_residuals,
                                         type = "scatter",
                                         mode = "markers",
                                         x = minibase_mod$fitted.values,
                                         y = minibase_mod$residuals,
                                         color = minibase_mod$FACTOR,
                                         colors = df_factor_info$color,
                                         marker = list(size = 15, opacity = 0.7))
  
  # # # Title and settings...
  plot006_residuals <-   plotly::layout(p = plot006_residuals,
                                        title = "Plot 006 - Scatterplot - Residuals vs Fitted.values",
                                        font = list(size = 20),
                                        margin = list(t = 100))
  
  
  # # # Without zerolines
  plot006_residuals <-   plotly::layout(p = plot006_residuals,
                                        xaxis = list(zeroline = FALSE),
                                        yaxis = list(zeroline = TRUE))
  
  
  # # # Plot output
  plot006_residuals
  # # # Create a new plot...
  plot007_residuals <- plotly::plot_ly()
  
  # # # Plot001 - Scatter plot for VR and FACTOR on minibase_mod *****************
  plot007_residuals <- plotly::add_trace(p = plot007_residuals,
                                         type = "scatter",
                                         mode = "markers",
                                         x = minibase_mod$FACTOR,
                                         y = minibase_mod$studres,
                                         color = minibase_mod$FACTOR,
                                         colors = df_factor_info$color,
                                         marker = list(size = 15, opacity = 0.7))
  
  # # # Title and settings...
  plot007_residuals <-   plotly::layout(p = plot007_residuals,
                                        title = "Plot 007 - Scatterplot - Studentized Residuals",
                                        font = list(size = 20),
                                        margin = list(t = 100))
  
  
  # # # Without zerolines
  plot007_residuals <-   plotly::layout(p = plot007_residuals,
                                        xaxis = list(zeroline = FALSE),
                                        yaxis = list(zeroline = TRUE))
  
  
  # # # Plot output
  plot007_residuals
  #library(plotly)
  plot008_residuals <- plotly::plot_ly()
  
  # Add traces
  plot008_residuals <- plotly::add_trace(p = plot008_residuals,
                                         type = "violin",
                                         x = minibase_mod$studres,
                                         #x = minibase_mod$FACTOR,
                                         showlegend = TRUE,
                                         side = "positive",
                                         points = "all",
                                         name = " ")#
  #color = minibase_mod$FACTOR,
  #colors = df_table_factor_plot006$color)
  
  
  
  # # # Title and settings...
  plot008_residuals <- plotly::layout(p = plot008_residuals,
                                      title = "Plot 008 - Studentized Residuals",
                                      font = list(size = 20),
                                      margin = list(t = 100))
  
  
  # # # Without zerolines...
  plot008_residuals <- plotly::layout(p = plot008_residuals,
                                      xaxis = list(zeroline = TRUE),
                                      yaxis = list(zeroline = FALSE))
  
  # # # Output plot003_anova...
  plot008_residuals
  # el 9
  
  x <- seq(-4, 4, length.out = 100)
  y <- dnorm(x, mean = 0, 1)
  #  x <- x*model_error_sd
  densidad_suavizada <- density(x, kernel = "gaussian", adjust = 0.5)
  hist_data_studres <- hist(minibase_mod$studres, plot = FALSE)
  hist_data_studres$"rel_frec" <- hist_data_studres$counts/sum(hist_data_studres$counts)
  
  densidad_studres <-  density(x = minibase_mod$studres, kernel = "gaussian", adjust =0.5)
  
  #library(plotly)
  plot009_residuals <- plotly::plot_ly()
  
  
  # plot005_residuals <- add_trace(p = plot005_residuals,
  #                                x = densidad_studres$x,
  #                                y = densidad_studres$y,
  #                                type = 'scatter',
  #                                mode = 'lines',
  #                                name = 'densidad_studres')
  
  plot009_residuals <- add_trace(p = plot009_residuals,
                                 x = x,
                                 y = y,
                                 type = 'scatter',
                                 mode = 'lines',
                                 name = 'Normal Standard')
  
  
  
  
  
  # # Add traces
  # plot005_residuals <- plotly::add_trace(p = plot005_residuals,
  #                                        type = "violin",
  #                                        x = minibase_mod$residuals,
  #                                        #x = minibase_mod$FACTOR,
  #                                        showlegend = TRUE,
  #                                        side = "positive",
  #                                        points = FALSE,
  #                                        name = "violinplot")#
  
  plot009_residuals <- plotly::add_trace(p = plot009_residuals,
                                         type = "bar",
                                         x = hist_data_studres$"mids",
                                         y = hist_data_studres$"density",
                                         name = "hist - studres")
  
  plot009_residuals <- plotly::layout(p = plot009_residuals,
                                      bargap = 0)
  
  plot009_residuals <- plotly::layout(p = plot009_residuals,
                                      title = "Plot 009 - Studres Distribution",
                                      font = list(size = 20),
                                      margin = list(t = 100))
  
  plot009_residuals
  # fin el 9
  
  
  
  
  qq_info <- EnvStats::qqPlot(x = minibase_mod$studres, plot.it = F,
                              param.list = list(mean = 0,
                                                sd = 1))
  
  cuantiles_teoricos <- qq_info$x
  cuantiles_observados <- qq_info$y
  
  #library(plotly)
  plot010_residuals <- plotly::plot_ly()
  
  # Crear el gráfico QQ plot
  plot010_residuals <-add_trace(p = plot010_residuals,
                                x = cuantiles_teoricos,
                                y = cuantiles_observados,
                                type = 'scatter', mode = 'markers',
                                marker = list(color = 'blue'),
                                name = "points")
  
  # Agregar la línea de identidad
  pendiente <- 1
  intercepto <- 0
  
  # Calcular las coordenadas de los extremos de la línea de identidad
  x_extremos <- range(cuantiles_teoricos)
  y_extremos <- pendiente * x_extremos + intercepto
  
  # Agregar la recta de identidad
  plot010_residuals <- add_trace(p = plot010_residuals,
                                 x = x_extremos,
                                 y = y_extremos,
                                 type = 'scatter',
                                 mode = 'lines',
                                 line = list(color = 'red'),
                                 name = "identity")
  
  
  # Establecer etiquetas de los ejes
  # plot007_residuals <- layout(p = plot007_residuals,
  #                             xaxis = list(title = 'Expected quantiles'),
  #                             yaxis = list(title = 'Observed quantiles'))
  
  plot010_residuals <- plotly::layout(p = plot010_residuals,
                                      title = "Plot 010 - QQ Plot - studres",
                                      font = list(size = 20),
                                      margin = list(t = 100))
  
  # Mostrar el gráfico
  plot010_residuals
---
title: "Rscience - _script_modulo"
format: 
  html:
    theme: 
      - custom_theme.scss
    grid:
      body-width: 2000px
      margin-width: 250px
      gutter-width: 1.5rem
    toc: true
    toc-float: true
    toc-location: right
    self-contained: true
    link-citations: true # Util para ver referencias en el margen (ver `tufte.html` en la galería de Quarto)

knitr: 
    opts_chunk: 
        collapse: false
        comment: ""
params:
    file_name: "mtcars"
    file_source: "r_source" 
    var_name_rv: "mpg"
    var_name_factor: "cyl"
    alpha_value: "0.05"
    vector_ordered_levels:
      - "6"
      - "4"
      - "8"
    vector_ordered_colors: 
      - "#000000"
      - "#00FF00"
      - "#0000FF"
---

# Libraries
  library("htmlwidgets")
  library("knitr")

# Initials
file_name   <- params$"file_name"
file_source <- params$"file_source"
var_name_rv <- params$"var_name_rv"
var_name_factor <- params$"var_name_factor"
alpha_value <- as.numeric(as.character(params$"alpha_value"))
vector_ordered_levels <- params$"vector_ordered_levels"
vector_ordered_colors <- params$"vector_ordered_colors"

#vector_ordered_colors <- params$"vector_ordered_colors"

# Basics lvl 01
check_r_source        <- file_source == "r_source"
check_rscience_source <- file_source == "rscience_source"
check_excel_source    <- file_source == "excel_source"
check_csv_source      <- file_source == "csv_source"

if(check_r_source) my_dataset <- get(file_name)

### INIT CODE ###
# # # # # Section 01 - Libraries ---------------------------------------------
  library("stats")     # General Linear Models
  library("agricolae") # Tukey test
  library("plotly")    # Advanced graphical functions
  library("dplyr")     # Developing with %>%

# # # # # Section 02 - Import dataset ----------------------------------------
  #+++---my_dataset <- _+A+_my_import_sentence_+A+_
  head(x = my_dataset, n = 5)
  
# # # # # Section 03 - Settings ----------------------------------------------
  #+++---var_name_rv     <- "_+B+_var_name_rv_+B+_"
  #+++---var_name_factor <- "_+B+_var_name_factor_+B+_"
  #+++---alpha_value     <- _+B+_alpha_value_+B+_
  #+++---vector_ordered_levels <- _+C+_vector_ordered_levels_+C+_
  #+++---vector_ordered_colors <- _+C+_vector_ordered_colors_+C+_


# # # # # Section 04 - Standard actions --------------------------------------
  # The factor must be factor data type on R.
  my_dataset[,var_name_factor] <- as.factor(as.character(my_dataset[,var_name_factor]))
  my_dataset[,var_name_factor] <- factor(my_dataset[,var_name_factor], levels = vector_ordered_levels)

# # # # # Section 05 - Alpha and confidence value ----------------------------
  confidence_value <- 1 - alpha_value
  
  df_alpha_confidence <- data.frame(
    "order" = 1:2,
    "detail" = c("alpha value", "confidence value"),
    "probability" = c(alpha_value, confidence_value),
    "percentaje" =  paste0(c(alpha_value, confidence_value)*100, "%")
  )
  df_alpha_confidence

# # # # # Section 06 - Selected variables and roles  -------------------------
  vector_all_var_names <- colnames(my_dataset)
  vector_name_selected_vars <- c(var_name_rv, var_name_factor)
  vector_rol_vars <- c("RV", "FACTOR")
  

  df_selected_vars <- data.frame(
    "order" = 1:length(vector_name_selected_vars),
    "var_name" = vector_name_selected_vars,
    "var_number" = match(vector_name_selected_vars, vector_all_var_names),
    "var_letter" = openxlsx::int2col(match(vector_name_selected_vars, vector_all_var_names)),
    "var_role" = vector_rol_vars,
    "doble_reference" = paste0(vector_rol_vars, "(", vector_name_selected_vars, ")")
  )
  df_selected_vars
  
  # # # # # Section 05 - minibase ------------------------------------------------
  # Only selected variabless. 
  # Only completed rows. 
  # Factor columns as factor object in R.
  minibase <- na.omit(my_dataset[vector_name_selected_vars])
  #colnames(minibase) <- vector_rol_vars
  minibase[,var_name_factor] <- as.factor(minibase[,var_name_factor])
  head(x = minibase, n = 5)
  

  
  # # # Anova control
  # 'VR' must be numeric and 'FACTOR must be factor.
  df_control_minibase <- data.frame(
    "order" = 1:nrow(df_selected_vars),
    "var_name" = df_selected_vars$"var_name",
    "var_role" = df_selected_vars$"var_role",
    "control" = c("is.numeric()", "is.factor()"),
    "verify" = c(is.numeric(minibase[,var_name_rv]), is.factor(minibase[,var_name_factor]))
  )
  df_control_minibase
  
  
  
  # # # my_dataset and minibase reps
  # Our 'n' is from minibase
  df_show_n <- data.frame(
    "object" = c("my_dataset", "minibase"),
    "n_col" = c(ncol(my_dataset), ncol(minibase)),
    "n_row" = c(nrow(my_dataset), nrow(minibase))
  )
  df_show_n
  
  
  
  # # # Factor info
  # Default order for levels its alphabetic order.
  df_factor_info <- data.frame(
    "order" = 1:nlevels(minibase[,2]),
    "level" = levels(minibase[,2]),
    "n" = as.vector(table(minibase[,2])),
    "mean" = tapply(minibase[,1], minibase[,2], mean),
    "color" = vector_ordered_colors
  )
  rownames(df_factor_info) <- 1:nrow(df_factor_info)
  df_factor_info
  
  
  
  # # # Unbalanced reps for levels?
  # Important information for Tukey.
  # If reps its equal or not equal in all levels must be detailled
  # on Tukey.
  check_unbalanced_reps <- length(unique(df_factor_info$n)) > 1
  check_unbalanced_reps
  
  phrase_yes_unbalanced <- "The design is unbalanced in repetitions. A correction is applied to the Tukey test."
  phrase_no_unbalanced  <- "The design is unbalanced in replicates. A correction should be applied to the Tukey test."
  phrase_selected_check_unbalanced <- ifelse(test = check_unbalanced_reps, 
                                  yes = phrase_yes_unbalanced,
                                  no  = phrase_no_unbalanced)
  
  phrase_selected_check_unbalanced
  
  # # # # # Section 06 - Anova Test ----------------------------------------------
  # # # Anova test
  the_formula <- paste0(var_name_rv,  " ~ " , var_name_factor)
  the_formula <- as.formula (the_formula)
  lm_anova <- lm(formula = the_formula, data = minibase)               # Linear model
  aov_anova <- aov(lm_anova)                                 # R results for anova
  df_table_anova <- as.data.frame(summary(aov_anova)[[1]])   # Common anova table
  df_table_anova
  
  
  
  # # # Standard error from model for each level
  model_error_var_MSE <- df_table_anova$`Mean Sq`[2]
  model_error_sd <- sqrt(model_error_var_MSE)
  
  df_model_error <- data.frame(
    "order" = df_factor_info$order,
    "level" = df_factor_info$level,
    "n" = df_factor_info$n,
    "model_error_var_MSE" = model_error_var_MSE,
    "model_error_sd" = model_error_sd
  )
  df_model_error["model_error_se"] <- df_model_error["model_error_sd"]/sqrt(df_model_error$n)
  df_model_error
  
  
  
  
  
  # # # # # Section 07 - minibase_mod --------------------------------------------
  # # # Detect rows on my_dataset there are on minibase
  dt_rows_my_dataset_ok <- rowSums(!is.na(my_dataset[vector_name_selected_vars])) == ncol(minibase)
  
  
  
  # # # Object minibase_mod and new cols
  minibase_mod <- minibase
  minibase_mod$"lvl_order_number" <- as.numeric(minibase_mod[,2])
  minibase_mod$"lvl_color" <- df_factor_info$color[minibase_mod$"lvl_order_number"]
  minibase_mod$"fitted.values" <- df_factor_info$"mean"[minibase_mod$"lvl_order_number"]
  minibase_mod$"residuals" <- lm_anova$residuals
  minibase_mod$"id_my_dataset" <- c(1:nrow(my_dataset))[dt_rows_my_dataset_ok]
  minibase_mod$"id_minibase" <- 1:nrow(minibase)
  minibase_mod$"studres" <- minibase_mod$"residuals"/model_error_sd
  
  
  
  
  
  # # # # # Section 08 - Requeriments for residuals-------------------------------
  # # # Normality test (Shapiro-Wilk)
  test_residuals_normality <- shapiro.test(minibase_mod$residuals)
  test_residuals_normality
  
  
  
  
  # # # Homogeinidy test (Bartlett)
  the_formula_bartlett <- paste0("residuals", " ~ ", var_name_factor)
  the_formula_bartlett <- as.formula(the_formula_bartlett)
  test_residuals_homogeneity <- bartlett.test(the_formula_bartlett, data = minibase_mod)
  test_residuals_homogeneity
  
  
  
  # # # Residuals variance from levels from original residuals
  df_raw_error <- data.frame(
    "order" = 1:nlevels(minibase_mod[,2]),
    "level" = levels(minibase_mod[,2]),
    "n" = tapply(minibase_mod$residuals, minibase_mod[,2], length),
    "raw_error_var" = tapply(minibase_mod$residuals, minibase_mod[,2], var),
    "raw_error_sd" = tapply(minibase_mod$residuals, minibase_mod[,2], sd)
  )
  df_model_error["raw_error_se"] <- df_model_error["model_error_sd"]/sqrt(df_model_error$"n")
  rownames(df_raw_error) <- 1:nrow(df_raw_error)
  df_raw_error
  
  phrase_info_errors <- c("Anova and Tukey use MSE from model.", 
                          "Bartlett use variance from raw error on each level.",
                          "Only if there is homogeneity from raw error variances then is a good idea take desition from MSE.")
  phrase_info_errors
  # # # Sum for residuals
  sum_residuals <- sum(minibase_mod$residuals)
  sum_residuals
  
  
  
  # # # Mean for residuals
  mean_residuals <- mean(minibase_mod$residuals)
  mean_residuals
  
  
  ##################################
  p_value_shapiro  <- test_residuals_normality$p.value
  p_value_bartlett <- test_residuals_homogeneity$p.value
  p_value_anova    <- df_table_anova$"Pr(>F)"[1]
  
  vector_p_value <- c(p_value_shapiro, p_value_bartlett, p_value_anova)
  vector_logic_rejected <- vector_p_value < alpha_value
  vector_ho_decision <- ifelse(test = vector_logic_rejected, yes = "Ho Rejected", "Ho no rejected")
  vector_ho_rejected <- ifelse(test = vector_logic_rejected, yes = "Yes", "No")
  
  df_summary_anova <- data.frame(
    "test" = c("Shapiro-Wilk test", "Bartlett test", "Anova 1 way"),
    "aim"  = c("Normality", "Homogeneity", "Mean"),
    "variable"    = c("residuals", "residuals", var_name_rv),
    "p_value"     = vector_p_value,
    "alpha_value" = c(alpha_value, alpha_value, alpha_value),
    "Decision"    = vector_ho_decision
  )
  
  df_summary_anova
  
  check_shapiro_rejected      <- p_value_shapiro < alpha_value
  phrase_shapiro_yes_rejected <- "The null hypothesis of normal distribution of residuals is rejected."
  phrase_shapiro_no_rejected  <- "The null hypothesis of normal distribution of residuals is not rejected."
  phrase_shapiro_selected     <- ifelse(test = check_shapiro_rejected, 
                                        yes = phrase_shapiro_yes_rejected, 
                                        no = phrase_shapiro_no_rejected)
  phrase_shapiro_selected 
  
  
  check_bartlett_rejected      <- p_value_bartlett < alpha_value
  phrase_bartlett_yes_rejected <- "The hypothesis of homogeneity of variances (homoscedasticity) is rejected."
  phrase_bartlett_no_rejected  <- "The hypothesis of homogeneity of variances (homoscedasticity) is not rejected."
  phrase_bartlett_selected     <- ifelse(test = check_bartlett_rejected, 
                                         yes = phrase_bartlett_yes_rejected, 
                                         no = phrase_bartlett_no_rejected)
  phrase_bartlett_selected
  
  
  check_ok_all_requeriments     <- sum(vector_logic_rejected[c(1,2)]) == 2
  phrase_requeriments_yes_valid <- "All residual assumptions are met, so it is valid to draw conclusions from the ANOVA test."
  phrase_requeriments_no_valid  <- "Not all model assumptions are met, so it is NOT valid to draw conclusions from the ANOVA test."
  phrase_requeriments_selected  <- ifelse(test = check_ok_all_requeriments, 
                                          yes = phrase_requeriments_yes_valid, 
                                          no = phrase_requeriments_no_valid)
  
  phrase_requeriments_selected  
  
  
  
  check_anova_rejected      <- p_value_anova < alpha_value
  phrase_anova_yes_rejected <- "The null hypothesis of the ANOVA test is rejected. There are statistically significant differences in at least one level of the factor."
  phrase_anova_no_rejected  <- "The null hypothesis of the ANOVA test is not rejected. All levels of the factor are statistically equal."
  phrase_anova_selected     <- ifelse(test = check_anova_rejected, 
                                      yes = phrase_anova_yes_rejected, 
                                      no = phrase_anova_no_rejected)
  
  phrase_anova_selected <- ifelse(
    test = check_ok_all_requeriments,
    yes = phrase_anova_selected,
    no = "Regardless of the p-value obtained in ANOVA, it is not valid to draw conclusions."
  )
  
  phrase_anova_selected
  ##############################################################################
  tukey01_full_groups <- agricolae::HSD.test(y = lm_anova,
                                             trt = colnames(minibase_mod)[2],
                                             alpha = alpha_value,
                                             group = TRUE,
                                             console = FALSE,
                                             unbalanced = check_unbalanced_reps)
  
  
  
  # # # Tukey test - Tukey pairs comparation - Full version
  tukey02_full_pairs <- agricolae::HSD.test(y = lm_anova,
                                            trt = colnames(minibase_mod)[2],
                                            alpha = alpha_value,
                                            group = FALSE,
                                            console = FALSE,
                                            unbalanced = check_unbalanced_reps)
  
  
  
  # # Original table from R about Tukey
  df_tukey_original_table <- tukey01_full_groups$groups
  df_tukey_original_table
  
  
  
  # # # New table about Tukey
  df_tukey_table <- data.frame(
    "level" = rownames(tukey01_full_groups$groups),
    "mean" = tukey01_full_groups$groups[,1],
    "group" = tukey01_full_groups$groups[,2]
  )
  df_tukey_table
  
  
  
  
  
  # # # # # Section 10 - Partitioned Measures (VR)--------------------------------
  # # # Partitioned Measures of Position (VR)
  df_vr_position_levels <- data.frame(
    "order" = 1:nlevels(minibase[,2]),
    "level" = levels(minibase[,2]),
    "min" = tapply(minibase[,1], minibase[,2], min),
    "mean" = tapply(minibase[,1], minibase[,2], mean),
    "Q1" = tapply(minibase[,1], minibase[,2], quantile, 0.25),
    "median" = tapply(minibase[,1], minibase[,2], median),
    "Q3" = tapply(minibase[,1], minibase[,2], quantile, 0.75),
    "max" = tapply(minibase[,1], minibase[,2], max),
    "n" = tapply(minibase[,1], minibase[,2], length)
  )
  
  
  
  # # # Partitioned Measures of Dispersion (VR)
  df_vr_dispersion_levels <- data.frame(
    "order" = 1:nlevels(minibase[,2]),
    "level" = levels(minibase[,2]),
    "range" = tapply(minibase[,1], minibase[,2], function(x){max(x) - min(x)}),
    "variance" = tapply(minibase[,1], minibase[,2], var),
    "standard_deviation" = tapply(minibase[,1], minibase[,2], sd),
    "standard_error" = tapply(minibase[,1], minibase[,2], function(x){sd(x)/sqrt(length(x))}),
    "n" = tapply(minibase[,1], minibase[,2], length)
  )
  df_vr_dispersion_levels
  
  
  
  # # # General Measures of Position (VR)
  df_vr_position_general <- data.frame(
    "min" = min(minibase[,1]),
    "mean" = mean(minibase[,1]),
    "median" = median(minibase[,1]),
    "max" = max(minibase[,1]),
    "n" = length(minibase[,1])
  )
  df_vr_position_general
  
  
  
  # # # General Measures of Dispersion (VR)
  df_vr_dispersion_general <- data.frame(
    "range" = max(minibase[,1]) - min(minibase[,1]),
    "variance" = var(minibase[,1]),
    "standard_deviation" = sd(minibase[,1]),
    "standard_error" = sd(minibase[,1])/(sqrt(length(minibase[,1]))),
    "n" = length(minibase[,1])
  )
  df_vr_dispersion_general
  
  
  
  
  
  # # # # # Section 11 - Partitioned Measures (Residuals)-------------------------
  # # # Partitioned Measures of Position (residuals)
  df_residuals_position_levels <- data.frame(
    "order" = 1:nlevels(minibase_mod[,2]),
    "level" = levels(minibase_mod[,2]),
    "min" = tapply(minibase_mod$residuals, minibase_mod[,2], min),
    "mean" = tapply(minibase_mod$residuals, minibase_mod[,2], mean),
    "median" = tapply(minibase_mod$residuals, minibase_mod[,2], median),
    "max" = tapply(minibase_mod$residuals, minibase_mod[,2], max),
    "n" = tapply(minibase_mod$residuals, minibase_mod[,2], length)
  )
  df_residuals_position_levels
  
  
  
  # # # Partitioned Measures of Dispersion (residuals)
  df_residual_dispersion_levels <- data.frame(
    "order" = 1:nlevels(minibase_mod[,2]),
    "level" = levels(minibase_mod[,2]),
    "range" = tapply(minibase_mod$residuals, minibase_mod[,2], function(x){max(x) - min(x)}),
    "variance" = tapply(minibase_mod$residuals, minibase_mod[,2], var),
    "standard_deviation" = tapply(minibase_mod$residuals, minibase_mod[,2], sd),
    "standard_error" = tapply(minibase_mod$residuals, minibase_mod[,2], function(x){sd(x)/sqrt(length(x))}),
    "n" = tapply(minibase_mod$residuals, minibase_mod[,2], length)
  )
  df_residual_dispersion_levels
  
  
  
  # # # General Measures of Position (residuals)
  df_residuals_position_general <- data.frame(
    "min" = min(minibase_mod$residuals),
    "mean" = mean(minibase_mod$residuals),
    "median" = median(minibase_mod$residuals),
    "max" = max(minibase_mod$residuals),
    "n" = length(minibase_mod$residuals)
  )
  df_residuals_position_general
  
  
  
  # # # General Measures of Dispersion (residuals)
  df_residuals_dispersion_general <- data.frame(
    "range" = max(minibase_mod$residuals) - min(minibase_mod$residuals),
    "variance" = var(minibase_mod$residuals),
    "standard_deviation" = sd(minibase_mod$residuals),
    "standard_error" = sd(minibase_mod$residuals)/(sqrt(length(minibase_mod$residuals))),
    "n" = length(minibase_mod$residuals)
  )
  df_residuals_dispersion_general
  
  
  
  
  
  # # # # # Section 12 - Model estimators ----------------------------------------
  # # # Means for each level
  vector_est_mu_i <- df_vr_position_levels$mean
  vector_est_mu_i
  
  
  
  # # # Mean of means
  est_mu <- mean(vector_est_mu_i)
  vector_est_mu <- rep(est_mu, length(vector_est_mu_i))
  vector_est_mu
  
  
  
  # # # Tau efects
  vector_est_tau_i <- vector_est_mu_i - vector_est_mu
  vector_est_tau_i
  
  
  
  # # # Sum of tau efects
  sum_est_tau_i <- sum(vector_est_tau_i)
  sum_est_tau_i
  
  
  
  # # # Long model information on dataframe
  df_anova1way_model_long <- data.frame(
    "order" = df_factor_info$order,
    "level" = df_factor_info$level,
    "n" = df_factor_info$n,
    "est_mu" = vector_est_mu,
    "est_tau_i" = vector_est_tau_i
  )
  df_anova1way_model_long
  
  
  
  # # # Short model information on dataframe
  df_anova1way_model_short <- data.frame(
    "order" = df_factor_info$order,
    "level" = df_factor_info$level,
    "n" = df_factor_info$n,
    "est_mu_i" = vector_est_mu_i
  )
  df_anova1way_model_short
  
  
  
  
  
  # # # # # Section 13 - Special table to plots ----------------------------------
  
  # # # Table for plot001
  df_table_factor_plot001 <- data.frame(
    "order" = df_factor_info$order,
    "level" = df_factor_info$level,
    "n" = df_factor_info$n,
    "mean" = tapply(minibase[,1], minibase[,2], mean),
    "min" = tapply(minibase[,1], minibase[,2], min),
    "max" = tapply(minibase[,1], minibase[,2], max),
    "sd" = tapply(minibase[,1], minibase[,2], sd),
    "var" = tapply(minibase[,1], minibase[,2], var)
  )
  
  df_table_factor_plot002 <- data.frame(
    "order" = df_factor_info$order,
    "level" = df_factor_info$level,
    "n" = df_factor_info$n,
    "mean" = tapply(minibase[,1], minibase[,2], mean),
    "model_error_sd" = df_model_error$model_error_sd
  )
  df_table_factor_plot002["lower_limit"] <- df_table_factor_plot002$mean - df_table_factor_plot002$model_error_sd
  df_table_factor_plot002["upper_limmit"] <- df_table_factor_plot002$mean + df_table_factor_plot002$model_error_sd
  df_table_factor_plot002["color"] <- df_factor_info$color
  df_table_factor_plot002
  
  
  
  df_table_factor_plot003 <- data.frame(
    "order" = df_factor_info$order,
    "level" = df_factor_info$level,
    "n" = df_factor_info$n,
    "mean" = tapply(minibase[,1], minibase[,2], mean),
    "model_error_se" = df_model_error$model_error_se
  )
  df_table_factor_plot003["lower_limit"] <- df_table_factor_plot003$mean - df_table_factor_plot003$model_error_se
  df_table_factor_plot003["upper_limmit"] <- df_table_factor_plot003$mean + df_table_factor_plot003$model_error_se
  df_table_factor_plot003["color"] <- df_factor_info$color
  df_table_factor_plot003
  
  
  
  # # # Table for plot004
  df_table_factor_plot004 <- df_vr_position_levels
  df_table_factor_plot004["color"] <- df_factor_info$color
  
  # # # Table for plot005
  df_table_factor_plot005 <- df_table_factor_plot004
  
  # # # Table for plot006
  df_table_factor_plot006 <- df_table_factor_plot004
  
  
  df_table_factor_plot007 <- df_table_factor_plot003
  correct_pos_letters <- order(df_tukey_table$level)
  vector_letters <- df_tukey_table$group[correct_pos_letters]
  df_table_factor_plot007["group"] <- vector_letters
  
  # # # Table for plot006
  df_table_residuals_plot001 <- data.frame(
    "order" = 1:nlevels(minibase_mod[,2]),
    "level" = levels(minibase_mod[,2]),
    "n" = tapply(minibase_mod$residuals, minibase_mod[,2], length),
    "min" = tapply(minibase_mod$residuals, minibase_mod[,2], min),
    "mean" = tapply(minibase_mod$residuals, minibase_mod[,2], mean),
    "max" = tapply(minibase_mod$residuals, minibase_mod[,2], max),
    "var" = tapply(minibase_mod$residuals, minibase_mod[,2], var),
    "sd" = tapply(minibase_mod$residuals, minibase_mod[,2], sd),
    "color" = df_factor_info$color
  )
  df_table_residuals_plot001
  
  # # # Table for plot006
  df_table_residuals_plot002 <- df_table_residuals_plot001
  
  # # # Table for plot006
  df_table_residuals_plot003 <- df_table_residuals_plot001
  
  # # # Table for plot006
  df_table_residuals_plot004 <- data.frame(
    "variable" = "residuals",
    "n" = length(minibase_mod$residuals),
    "min" = min(minibase_mod$residuals),
    "mean" = mean(minibase_mod$residuals),
    "max" = max(minibase_mod$residuals),
    "var" = var(minibase_mod$residuals),
    "sd" = sd(minibase_mod$residuals),
    "model_error_var_MSE" = model_error_var_MSE,
    "model_error_sd" = model_error_sd
  )
  
  phrase_coment_errors <- "Model Error (MSE) "
  
  # # # Table for plot006
  df_table_residuals_plot005  <- df_table_residuals_plot004
  
  # # # Table for plot006
  df_table_residuals_plot006 <- data.frame(
    "order" = 1:nlevels(minibase_mod[,2]),
    "level" = levels(minibase_mod[,2]),
    "n" = tapply(minibase_mod$studres, minibase_mod[,2], length),
    "min" = tapply(minibase_mod$studres, minibase_mod[,2], min),
    "mean" = tapply(minibase_mod$studres, minibase_mod[,2], mean),
    "max" = tapply(minibase_mod$studres, minibase_mod[,2], max),
    "var" = tapply(minibase_mod$studres, minibase_mod[,2], var),
    "sd" = tapply(minibase_mod$studres, minibase_mod[,2], sd),
    "color" = df_factor_info$color
  )
  
  
  # # # Table for plot006
  df_table_residuals_plot007 <- df_table_residuals_plot006
  
  
  df_table_residuals_plot008 <- data.frame(
    "variable" = "studres",
    "n" = length(minibase_mod$studres),
    "min" = min(minibase_mod$studres),
    "mean" = mean(minibase_mod$studres),
    "max" = max(minibase_mod$studres),
    "var" = var(minibase_mod$studres),
    "sd" = sd(minibase_mod$studres)
  )
  
  
  df_table_residuals_plot009 <- df_table_residuals_plot008
  
  df_table_residuals_plot010 <- df_table_residuals_plot008
  
  #############################################################
  # # # Create a new plot...
  plot001_factor <- plotly::plot_ly()
  
  # # # Plot001 - Scatter plot for VR and FACTOR on minibase_mod *****************
  plot001_factor <- plotly::add_trace(p = plot001_factor,
                                      type = "scatter",
                                      mode = "markers",
                                      x = minibase_mod[,var_name_factor],
                                      y = minibase_mod[,var_name_rv],
                                      color = minibase_mod[,var_name_factor],
                                      colors = df_factor_info$color,
                                      marker = list(size = 15, opacity = 0.7))
  
  # # # Title and settings...
  plot001_factor <-   plotly::layout(p = plot001_factor,
                                     title = "Plot 001 - Scatterplot",
                                     font = list(size = 20),
                                     margin = list(t = 100))
  
  
  # # # Without zerolines
  plot001_factor <-   plotly::layout(p = plot001_factor,
                                     xaxis = list(zeroline = FALSE),
                                     yaxis = list(zeroline = FALSE))
  
  
  # # # Plot output
  plot001_factor
  
  
  ##############################################################################
  
  # # # Create a new plot...
  plot002_factor <- plot_ly()
  
  
  # # # Adding errors...
  plot002_factor <-   add_trace(p = plot002_factor,
                                type = "scatter",
                                mode = "markers",
                                x = df_table_factor_plot002$level,
                                y = df_table_factor_plot002$mean,
                                color = df_table_factor_plot002$level,
                                colors = df_table_factor_plot002$color,
                                marker = list(symbol = "line-ew-open",
                                              size = 50,
                                              opacity = 1,
                                              line = list(width = 5)),
                                error_y = list(type = "data", array = df_table_factor_plot002$model_error_sd)
  )
  
  
  # # # Title and settings...
  plot002_factor <- plotly::layout(p = plot002_factor,
                                   title = "Plot 002 - Mean and model standard deviation",
                                   font = list(size = 20),
                                   margin = list(t = 100))
  
  # # # Without zerolines
  plot002_factor <-plotly::layout(p = plot002_factor,
                                  xaxis = list(zeroline = FALSE),
                                  yaxis = list(zeroline = FALSE))
  
  # # # Plot output
  plot002_factor
  
  ##############################################################################
  
  
  # # # Create a new plot...
  plot003_factor <- plotly::plot_ly()
  
  
  # # # Adding errors...
  plot003_factor <-   plotly::add_trace(p = plot003_factor,
                                        type = "scatter",
                                        mode = "markers",
                                        x = df_table_factor_plot003$level,
                                        y = df_table_factor_plot003$mean,
                                        color = df_table_factor_plot003$level,
                                        colors = df_table_factor_plot003$color,
                                        marker = list(symbol = "line-ew-open",
                                                      size = 50,
                                                      opacity = 1,
                                                      line = list(width = 5)),
                                        error_y = list(type = "data", array = df_table_factor_plot003$model_error_se)
  )
  
  
  # # # Title and settings...
  plot003_factor <- plotly::layout(p = plot003_factor,
                                   title = "Plot 003 - Mean y model standard error",
                                   font = list(size = 20),
                                   margin = list(t = 100))
  
  # # # Without zerolines
  plot003_factor <-plotly::layout(p = plot003_factor,
                                  xaxis = list(zeroline = FALSE),
                                  yaxis = list(zeroline = FALSE))
  
  # # # Plot output
  plot003_factor
  
  ##############################################################################
  
  
  # # # New plotly...
  plot004_factor <- plotly::plot_ly()
  
  # # # Boxplot and info...
  plot004_factor <- plotly::add_trace(p = plot004_factor,
                                      type = "box",
                                      x = df_table_factor_plot004$level ,
                                      color = df_table_factor_plot004$level,
                                      colors = df_table_factor_plot004$color,
                                      lowerfence = df_table_factor_plot004$min,
                                      q1 = df_table_factor_plot004$Q1,
                                      median = df_table_factor_plot004$median,
                                      q3 = df_table_factor_plot004$Q3,
                                      upperfence = df_table_factor_plot004$max,
                                      boxmean = TRUE,
                                      boxpoints = FALSE,
                                      line = list(color = "black", width = 3)
  )
  
  # # # Title and settings...
  plot004_factor <- plotly::layout(p = plot004_factor,
                                   title = "Plot 004 - Boxplot and means",
                                   font = list(size = 20),
                                   margin = list(t = 100))
  
  
  # # # Without zerolines...
  plot004_factor <- plotly::layout(p = plot004_factor,
                                   xaxis = list(zeroline = FALSE),
                                   yaxis = list(zeroline = FALSE))
  
  # # # Output plot004_anova...
  plot004_factor
  
  ##############################################################################
  
  all_levels <- levels(minibase_mod[,2])
  n_levels <- length(all_levels)
  all_color <- rainbow(length(all_levels))
  
  
  
  plot005_factor <- plot_ly()
  
  # Violinplot
  for (k in 1:n_levels){
    
    # Selected values
    selected_level <- all_levels[k]
    selected_color <- all_color[k]
    dt_filas <- minibase_mod[,2] == selected_level
    
    # Plotting selected violinplot
    plot005_factor <- plot005_factor %>%
      add_trace(x = minibase_mod[,2][dt_filas],
                y = minibase_mod[,1][dt_filas],
                type = "violin",
                name = paste0("violin", k),
                points = "all",
                marker = list(color = selected_color),
                line = list(color = selected_color),
                fillcolor = I(selected_color)
                
      )
    
    
  }
  
  
  
  
  # Boxplot
  plot005_factor <- plotly::add_trace(p = plot005_factor,
                                      type = "box",
                                      name = "boxplot",
                                      x = df_table_factor_plot005$level ,
                                      color = df_table_factor_plot005$level ,
                                      lowerfence = df_table_factor_plot005$min,
                                      q1 = df_table_factor_plot005$Q1,
                                      median = df_table_factor_plot005$median,
                                      q3 = df_table_factor_plot005$Q3,
                                      upperfence = df_table_factor_plot005$max,
                                      boxmean = TRUE,
                                      boxpoints = TRUE,
                                      fillcolor = df_table_factor_plot005$color,
                                      line = list(color = "black", width = 3),
                                      opacity = 0.5,
                                      width = 0.2)
  
  
  # # # Title and settings...
  plot005_factor <- plotly::layout(p = plot005_factor,
                                   title = "Plot 005 - Violinplot",
                                   font = list(size = 20),
                                   margin = list(t = 100))
  
  
  # # # Without zerolines...
  plot005_factor <- plotly::layout(p = plot005_factor,
                                   xaxis = list(zeroline = FALSE),
                                   yaxis = list(zeroline = FALSE))
  
  # # # Output plot003_anova...
  plot005_factor
  
  ##############################################################################
  
  
  #library(plotly)
  plot006_factor <- plotly::plot_ly()
  
  # Add traces
  plot006_factor <- plotly::add_trace(p = plot006_factor,
                                      type = "violin",
                                      y = minibase_mod$VR,
                                      x = minibase_mod$FACTOR,
                                      showlegend = TRUE,
                                      side = "positive",
                                      points = "all",
                                      name = "Violinplot",
                                      color = minibase_mod$FACTOR,
                                      colors = df_table_factor_plot006$color)
  
  
  
  # # # Title and settings...
  plot006_factor <- plotly::layout(p = plot006_factor,
                                   title = "Plot 006 - Scatterplot + Jitter +  Smoothed",
                                   font = list(size = 20),
                                   margin = list(t = 100))
  
  
  # # # Without zerolines...
  plot006_factor <- plotly::layout(p = plot006_factor,
                                   xaxis = list(zeroline = FALSE),
                                   yaxis = list(zeroline = FALSE))
  
  # # # Output plot003_anova...
  plot006_factor
  
  ##############################################################################
  
  # # # Create a new plot...
  plot007_factor <- plotly::plot_ly()
  
  
  # # # Adding errors...
  plot007_factor <-   plotly::add_trace(p = plot007_factor,
                                        type = "scatter",
                                        mode = "markers",
                                        x = df_table_factor_plot007$level,
                                        y = df_table_factor_plot007$mean,
                                        color = df_table_factor_plot007$level,
                                        colors = df_table_factor_plot007$color,
                                        marker = list(symbol = "line-ew-open",
                                                      size = 50,
                                                      opacity = 1,
                                                      line = list(width = 5)),
                                        error_y = list(type = "data", array = df_table_factor_plot007$model_error_se)
  )
  
  
  
  plot007_factor <-  add_text(p = plot007_factor,
                              x = df_table_factor_plot007$level,
                              y = df_table_factor_plot007$mean,
                              text = df_table_factor_plot007$group, name = "Tukey Group",
                              size = 20)
  
  # # # Title and settings...
  plot007_factor <- plotly::layout(p = plot007_factor,
                                   title = "Plot 007 - Mean y model standard error",
                                   font = list(size = 20),
                                   margin = list(t = 100))
  
  # # # Without zerolines
  plot007_factor <-plotly::layout(p = plot007_factor,
                                  xaxis = list(zeroline = FALSE),
                                  yaxis = list(zeroline = FALSE))
  
  
  # # # Plot output
  plot007_factor
  
  #######----
  
  ####### DESDE ACAAAAAAAAAAAAAAAAAAAA
  # # # Create a new plot...
  plot001_residuals <- plotly::plot_ly()
  
  # # # Plot001 - Scatter plot for VR and FACTOR on minibase_mod *****************
  plot001_residuals <- plotly::add_trace(p = plot001_residuals,
                                         type = "scatter",
                                         mode = "markers",
                                         x = minibase_mod$FACTOR,
                                         y = minibase_mod$residuals,
                                         color = minibase_mod$FACTOR,
                                         colors = df_factor_info$color,
                                         marker = list(size = 15, opacity = 0.7))
  
  # # # Title and settings...
  plot001_residuals <-   plotly::layout(p = plot001_residuals,
                                        title = "Plot 001 - Scatterplot - Residuals",
                                        font = list(size = 20),
                                        margin = list(t = 100))
  
  
  # # # Without zerolines
  plot001_residuals <-   plotly::layout(p = plot001_residuals,
                                        xaxis = list(zeroline = FALSE),
                                        yaxis = list(zeroline = TRUE))
  
  
  # # # Plot output
  plot001_residuals
  
  
  
  
  
  #library(plotly)
  plot002_residuals <- plotly::plot_ly()
  
  # Add traces
  plot002_residuals <- plotly::add_trace(p = plot002_residuals,
                                         type = "violin",
                                         y = minibase_mod$residuals,
                                         x = minibase_mod$FACTOR,
                                         showlegend = TRUE,
                                         side = "positive",
                                         points = "all",
                                         name = "Violinplot",
                                         color = minibase_mod$FACTOR,
                                         colors = df_table_residuals_plot002$color)
  
  
  
  # # # Title and settings...
  plot002_residuals <- plotly::layout(p = plot002_residuals,
                                      title = "Plot 002 - Residuals - Scatterplot + Jitter +  Smoothed",
                                      font = list(size = 20),
                                      margin = list(t = 100))
  
  
  # # # Without zerolines...
  plot002_residuals <- plotly::layout(p = plot002_residuals,
                                      xaxis = list(zeroline = FALSE),
                                      yaxis = list(zeroline = FALSE))
  
  # # # Output plot003_anova...
  plot002_residuals
  
  
  
  
  
  
  plot003_residuals <- plotly::plot_ly()
  
  # Add traces
  plot003_residuals <- plotly::add_trace(p = plot003_residuals,
                                         type = "violin",
                                         x = minibase_mod$residuals,
                                         showlegend = TRUE,
                                         side = "positive",
                                         points = FALSE,
                                         #name = levels(minibase_mod$FACTOR)[minibase_mod$lvl_order_number],
                                         color = minibase_mod$FACTOR,
                                         colors = df_table_residuals_plot003$color)
  
  
  
  # # # Title and settings...
  plot003_residuals <- plotly::layout(p = plot003_residuals,
                                      title = "Plot 003 - Residuals - Scatterplot + Jitter +  Smoothed",
                                      font = list(size = 20),
                                      margin = list(t = 100))
  
  
  # # # Without zerolines...
  plot003residuals <- plotly::layout(p = plot003_residuals,
                                     xaxis = list(zeroline = FALSE),
                                     yaxis = list(zeroline = FALSE))
  
  # # # Output plot003_anova...
  plot003_residuals
  
  
  
  
  
  plot004_residuals <- plotly::plot_ly()
  
  # Add traces
  plot004_residuals <- plotly::add_trace(p = plot004_residuals,
                                         type = "violin",
                                         x = minibase_mod$residuals,
                                         #x = minibase_mod$FACTOR,
                                         showlegend = TRUE,
                                         side = "positive",
                                         points = "all",
                                         name = " ")#
  #color = minibase_mod$FACTOR,
  #colors = df_table_factor_plot006$color)
  
  
  
  # # # Title and settings...
  plot004_residuals <- plotly::layout(p = plot004_residuals,
                                      title = "Plot 004 - Residuals",
                                      font = list(size = 20),
                                      margin = list(t = 100))
  
  
  # # # Without zerolines...
  plot004_residuals <- plotly::layout(p = plot004_residuals,
                                      xaxis = list(zeroline = TRUE),
                                      yaxis = list(zeroline = FALSE))
  
  # # # Output plot003_anova...
  plot004_residuals
  
  
  
  
  # - el 5
  qq_info <- EnvStats::qqPlot(x = minibase_mod$residuals, plot.it = F,
                              param.list = list(mean = mean(minibase_mod$residuals),
                                                sd = sd(minibase_mod$residuals)))
  
  cuantiles_teoricos <- qq_info$x
  cuantiles_observados <- qq_info$y
  
  #library(plotly)
  plot005_residuals <- plotly::plot_ly()
  
  # Crear el gráfico QQ plot
  plot005_residuals <-add_trace(p = plot005_residuals,
                                x = cuantiles_teoricos,
                                y = cuantiles_observados,
                                type = 'scatter', mode = 'markers',
                                marker = list(color = 'blue'),
                                name = "points")
  
  # Agregar la línea de identidad
  pendiente <- 1
  intercepto <- 0
  
  # Calcular las coordenadas de los extremos de la línea de identidad
  x_extremos <- range(cuantiles_teoricos)
  y_extremos <- pendiente * x_extremos + intercepto
  
  # Agregar la recta de identidad
  plot005_residuals <- add_trace(p = plot005_residuals,
                                 x = x_extremos,
                                 y = y_extremos,
                                 type = 'scatter',
                                 mode = 'lines',
                                 line = list(color = 'red'),
                                 name = "identity")
  
  
  # Establecer etiquetas de los ejes
  # plot007_residuals <- layout(p = plot007_residuals,
  #                             xaxis = list(title = 'Expected quantiles'),
  #                             yaxis = list(title = 'Observed quantiles'))
  
  plot005_residuals <- plotly::layout(p = plot005_residuals,
                                      title = "Plot 005 - QQ Plot Residuals",
                                      font = list(size = 20),
                                      margin = list(t = 100))
  
  # Mostrar el gráfico
  plot005_residuals
  # - Fin el 5
  
  
  
  # # # Create a new plot...
  plot006_residuals <- plotly::plot_ly()
  
  # # # Plot001 - Scatter plot for VR and FACTOR on minibase_mod *****************
  plot006_residuals <- plotly::add_trace(p = plot006_residuals,
                                         type = "scatter",
                                         mode = "markers",
                                         x = minibase_mod$fitted.values,
                                         y = minibase_mod$residuals,
                                         color = minibase_mod$FACTOR,
                                         colors = df_factor_info$color,
                                         marker = list(size = 15, opacity = 0.7))
  
  # # # Title and settings...
  plot006_residuals <-   plotly::layout(p = plot006_residuals,
                                        title = "Plot 006 - Scatterplot - Residuals vs Fitted.values",
                                        font = list(size = 20),
                                        margin = list(t = 100))
  
  
  # # # Without zerolines
  plot006_residuals <-   plotly::layout(p = plot006_residuals,
                                        xaxis = list(zeroline = FALSE),
                                        yaxis = list(zeroline = TRUE))
  
  
  # # # Plot output
  plot006_residuals
  
  
  
  
  # # # Create a new plot...
  plot007_residuals <- plotly::plot_ly()
  
  # # # Plot001 - Scatter plot for VR and FACTOR on minibase_mod *****************
  plot007_residuals <- plotly::add_trace(p = plot007_residuals,
                                         type = "scatter",
                                         mode = "markers",
                                         x = minibase_mod$FACTOR,
                                         y = minibase_mod$studres,
                                         color = minibase_mod$FACTOR,
                                         colors = df_factor_info$color,
                                         marker = list(size = 15, opacity = 0.7))
  
  # # # Title and settings...
  plot007_residuals <-   plotly::layout(p = plot007_residuals,
                                        title = "Plot 007 - Scatterplot - Studentized Residuals",
                                        font = list(size = 20),
                                        margin = list(t = 100))
  
  
  # # # Without zerolines
  plot007_residuals <-   plotly::layout(p = plot007_residuals,
                                        xaxis = list(zeroline = FALSE),
                                        yaxis = list(zeroline = TRUE))
  
  
  # # # Plot output
  plot007_residuals
  
  
  
  
  
  
  
  #library(plotly)
  plot008_residuals <- plotly::plot_ly()
  
  # Add traces
  plot008_residuals <- plotly::add_trace(p = plot008_residuals,
                                         type = "violin",
                                         x = minibase_mod$studres,
                                         #x = minibase_mod$FACTOR,
                                         showlegend = TRUE,
                                         side = "positive",
                                         points = "all",
                                         name = " ")#
  #color = minibase_mod$FACTOR,
  #colors = df_table_factor_plot006$color)
  
  
  
  # # # Title and settings...
  plot008_residuals <- plotly::layout(p = plot008_residuals,
                                      title = "Plot 008 - Studentized Residuals",
                                      font = list(size = 20),
                                      margin = list(t = 100))
  
  
  # # # Without zerolines...
  plot008_residuals <- plotly::layout(p = plot008_residuals,
                                      xaxis = list(zeroline = TRUE),
                                      yaxis = list(zeroline = FALSE))
  
  # # # Output plot003_anova...
  plot008_residuals
  
  
  
  
  # el 9
  
  x <- seq(-4, 4, length.out = 100)
  y <- dnorm(x, mean = 0, 1)
  #  x <- x*model_error_sd
  densidad_suavizada <- density(x, kernel = "gaussian", adjust = 0.5)
  hist_data_studres <- hist(minibase_mod$studres, plot = FALSE)
  hist_data_studres$"rel_frec" <- hist_data_studres$counts/sum(hist_data_studres$counts)
  
  densidad_studres <-  density(x = minibase_mod$studres, kernel = "gaussian", adjust =0.5)
  
  #library(plotly)
  plot009_residuals <- plotly::plot_ly()
  
  
  # plot005_residuals <- add_trace(p = plot005_residuals,
  #                                x = densidad_studres$x,
  #                                y = densidad_studres$y,
  #                                type = 'scatter',
  #                                mode = 'lines',
  #                                name = 'densidad_studres')
  
  plot009_residuals <- add_trace(p = plot009_residuals,
                                 x = x,
                                 y = y,
                                 type = 'scatter',
                                 mode = 'lines',
                                 name = 'Normal Standard')
  
  
  
  
  
  # # Add traces
  # plot005_residuals <- plotly::add_trace(p = plot005_residuals,
  #                                        type = "violin",
  #                                        x = minibase_mod$residuals,
  #                                        #x = minibase_mod$FACTOR,
  #                                        showlegend = TRUE,
  #                                        side = "positive",
  #                                        points = FALSE,
  #                                        name = "violinplot")#
  
  plot009_residuals <- plotly::add_trace(p = plot009_residuals,
                                         type = "bar",
                                         x = hist_data_studres$"mids",
                                         y = hist_data_studres$"density",
                                         name = "hist - studres")
  
  plot009_residuals <- plotly::layout(p = plot009_residuals,
                                      bargap = 0)
  
  plot009_residuals <- plotly::layout(p = plot009_residuals,
                                      title = "Plot 009 - Studres Distribution",
                                      font = list(size = 20),
                                      margin = list(t = 100))
  
  plot009_residuals
  # fin el 9
  
  
  
  
  qq_info <- EnvStats::qqPlot(x = minibase_mod$studres, plot.it = F,
                              param.list = list(mean = 0,
                                                sd = 1))
  
  cuantiles_teoricos <- qq_info$x
  cuantiles_observados <- qq_info$y
  
  #library(plotly)
  plot010_residuals <- plotly::plot_ly()
  
  # Crear el gráfico QQ plot
  plot010_residuals <-add_trace(p = plot010_residuals,
                                x = cuantiles_teoricos,
                                y = cuantiles_observados,
                                type = 'scatter', mode = 'markers',
                                marker = list(color = 'blue'),
                                name = "points")
  
  # Agregar la línea de identidad
  pendiente <- 1
  intercepto <- 0
  
  # Calcular las coordenadas de los extremos de la línea de identidad
  x_extremos <- range(cuantiles_teoricos)
  y_extremos <- pendiente * x_extremos + intercepto
  
  # Agregar la recta de identidad
  plot010_residuals <- add_trace(p = plot010_residuals,
                                 x = x_extremos,
                                 y = y_extremos,
                                 type = 'scatter',
                                 mode = 'lines',
                                 line = list(color = 'red'),
                                 name = "identity")
  
  
  # Establecer etiquetas de los ejes
  # plot007_residuals <- layout(p = plot007_residuals,
  #                             xaxis = list(title = 'Expected quantiles'),
  #                             yaxis = list(title = 'Observed quantiles'))
  
  plot010_residuals <- plotly::layout(p = plot010_residuals,
                                      title = "Plot 010 - QQ Plot - studres",
                                      font = list(size = 20),
                                      margin = list(t = 100))
  
  # Mostrar el gráfico
  plot010_residuals

Data Analysis

Report (Automated Statistical Assistant)

  • Dataset file name: jaja
  • Response Variable: mpg
  • Factor: cyl
  • Alpha value: 0.05

The null hypothesis of normal distribution of residuals is not rejected.
The hypothesis of homogeneity of variances (homoscedasticity) is rejected.
Not all model assumptions are met, so it is NOT valid to draw conclusions from the ANOVA test.
Regardless of the p-value obtained in ANOVA, it is not valid to draw conclusions.

1) Anova test

test aim variable p_value alpha_value Decision
Shapiro-Wilk test Normality residuals 0.5176650 0.05 Ho no rejected
Bartlett test Homogeneity residuals 0.0150452 0.05 Ho Rejected
Anova 1 way Mean mpg 0.0000000 0.05 Ho Rejected

This section details the statistical hypotheses for the tests used to validate the assumptions and primary conclusion of the One-Way Analysis of Variance (ANOVA).

One-Way ANOVA Hypotheses

The one-way ANOVA (Analysis of Variance) test determines if there are any statistically significant differences between the means of three or more independent groups.

Hypothesis Type Notation English Statement
Null Hypothesis \(H_0\) The means of all population groups are equal.
Alternative Hypothesis \(H_a\) At least one group mean is different from the others.

Null Hypothesis Equation: \[\mu_1 = \mu_2 = \mu_3 = \dots = \mu_k\]


Shapiro-Wilk Test (Residual Normality)

The Shapiro-Wilk test is used to check if the sample data (the ANOVA residuals) follows a normal distribution, which is a required assumption for the ANOVA test.

Hypothesis Type Notation English Statement
Null Hypothesis \(H_0\) The data (residuals) are drawn from a normally distributed population.
Alternative Hypothesis \(H_a\) The data (residuals) are not drawn from a normally distributed population.

Levene’s Test (Homogeneity of Variances)

Levene’s test is used to verify the assumption of homoscedasticity ((^2_1 = ^2_2 = = ^2_k)) across the different groups being compared in the ANOVA.

Hypothesis Type Notation English Statement
Null Hypothesis \(H_0\) The population variances of the groups are equal.
Alternative Hypothesis \(H_a\) The population variances of the groups are not equal.

1) Requeriment - Normaility test - Residuals

$test_residuals_normality

    Shapiro-Wilk normality test

data:  minibase_mod$residuals
W = 0.97065, p-value = 0.5177

2) Requeriment - Homogeneity test - Residuals

$test_residuals_homogeneity

    Bartlett test of homogeneity of variances

data:  residuals by cyl
Bartlett's K-squared = 8.3934, df = 2, p-value = 0.01505

3) Estimated variances - Residuals

$df_model_error
  order level  n model_error_var_MSE model_error_sd model_error_se raw_error_se
1     1     6  7            10.38837       3.223099      1.2182168    1.2182168
2     2     4 11            10.38837       3.223099      0.9718008    0.9718008
3     3     8 14            10.38837       3.223099      0.8614094    0.8614094

$df_raw_error
  order level  n raw_error_var raw_error_sd
1     1     6  7      2.112857     1.453567
2     2     4 11     20.338545     4.509828
3     3     8 14      6.553846     2.560048

$phrase_info_errors
[1] "Anova and Tukey use MSE from model."                                                              
[2] "Bartlett use variance from raw error on each level."                                              
[3] "Only if there is homogeneity from raw error variances then is a good idea take desition from MSE."
$df_table_factor_plot001
  order level  n     mean  min  max       sd       var
6     1     6  7 19.74286 17.8 21.4 1.453567  2.112857
4     2     4 11 26.66364 21.4 33.9 4.509828 20.338545
8     3     8 14 15.10000 10.4 19.2 2.560048  6.553846

$df_table_factor_plot002
  order level  n     mean model_error_sd lower_limit upper_limmit   color
6     1     6  7 19.74286       3.223099    16.51976     22.96596 #FF0000
4     2     4 11 26.66364       3.223099    23.44054     29.88674 #00FF00
8     3     8 14 15.10000       3.223099    11.87690     18.32310 #0000FF

$df_table_factor_plot003
  order level  n     mean model_error_se lower_limit upper_limmit   color
6     1     6  7 19.74286      1.2182168    18.52464     20.96107 #FF0000
4     2     4 11 26.66364      0.9718008    25.69184     27.63544 #00FF00
8     3     8 14 15.10000      0.8614094    14.23859     15.96141 #0000FF

$df_table_factor_plot004
  order level  min     mean    Q1 median    Q3  max  n   color
6     1     6 17.8 19.74286 18.65   19.7 21.00 21.4  7 #FF0000
4     2     4 21.4 26.66364 22.80   26.0 30.40 33.9 11 #00FF00
8     3     8 10.4 15.10000 14.40   15.2 16.25 19.2 14 #0000FF

$df_table_factor_plot005
  order level  min     mean    Q1 median    Q3  max  n   color
6     1     6 17.8 19.74286 18.65   19.7 21.00 21.4  7 #FF0000
4     2     4 21.4 26.66364 22.80   26.0 30.40 33.9 11 #00FF00
8     3     8 10.4 15.10000 14.40   15.2 16.25 19.2 14 #0000FF

$df_table_factor_plot006
  order level  min     mean    Q1 median    Q3  max  n   color
6     1     6 17.8 19.74286 18.65   19.7 21.00 21.4  7 #FF0000
4     2     4 21.4 26.66364 22.80   26.0 30.40 33.9 11 #00FF00
8     3     8 10.4 15.10000 14.40   15.2 16.25 19.2 14 #0000FF

  order level  n     mean model_error_se lower_limit upper_limmit   color group
6     1     6  7 19.74286      1.2182168    18.52464     20.96107 #FF0000     a
4     2     4 11 26.66364      0.9718008    25.69184     27.63544 #00FF00     b
8     3     8 14 15.10000      0.8614094    14.23859     15.96141 #0000FF     c

$df_table_residuals_plot001
  order level  n       min          mean      max       var       sd   color
6     1     6  7 -1.942857  3.410101e-16 1.657143  2.112857 1.453567 #FF0000
4     2     4 11 -5.263636 -1.918446e-16 7.236364 20.338545 4.509828 #00FF00
8     3     8 14 -4.700000 -3.191891e-16 4.100000  6.553846 2.560048 #0000FF

$df_table_residuals_plot002
  order level  n       min          mean      max       var       sd   color
6     1     6  7 -1.942857  3.410101e-16 1.657143  2.112857 1.453567 #FF0000
4     2     4 11 -5.263636 -1.918446e-16 7.236364 20.338545 4.509828 #00FF00
8     3     8 14 -4.700000 -3.191891e-16 4.100000  6.553846 2.560048 #0000FF

$df_table_residuals_plot003
  order level  n       min          mean      max       var       sd   color
6     1     6  7 -1.942857  3.410101e-16 1.657143  2.112857 1.453567 #FF0000
4     2     4 11 -5.263636 -1.918446e-16 7.236364 20.338545 4.509828 #00FF00
8     3     8 14 -4.700000 -3.191891e-16 4.100000  6.553846 2.560048 #0000FF

$df_table_residuals_plot004
   variable  n       min          mean      max      var       sd
1 residuals 32 -5.263636 -1.309716e-16 7.236364 9.718148 3.117394
  model_error_var_MSE model_error_sd
1            10.38837       3.223099

$df_table_residuals_plot005
   variable  n       min          mean      max      var       sd
1 residuals 32 -5.263636 -1.309716e-16 7.236364 9.718148 3.117394
  model_error_var_MSE model_error_sd
1            10.38837       3.223099

$df_table_residuals_plot006
  order level  n        min          mean       max       var        sd   color
6     1     6  7 -0.6027917  9.813361e-17 0.5141459 0.2033869 0.4509843 #FF0000
4     2     4 11 -1.6330981 -6.559423e-17 2.2451573 1.9578196 1.3992211 #00FF00
8     3     8 14 -1.4582240 -9.020562e-17 1.2720678 0.6308833 0.7942816 #0000FF

$df_table_residuals_plot007
  order level  n        min          mean       max       var        sd   color
6     1     6  7 -0.6027917  9.813361e-17 0.5141459 0.2033869 0.4509843 #FF0000
4     2     4 11 -1.6330981 -6.559423e-17 2.2451573 1.9578196 1.3992211 #00FF00
8     3     8 14 -1.4582240 -9.020562e-17 1.2720678 0.6308833 0.7942816 #0000FF

$df_table_residuals_plot008
  variable  n       min          mean      max       var        sd
1  studres 32 -1.633098 -4.054238e-17 2.245157 0.9354839 0.9672042

$df_table_residuals_plot009
  variable  n       min          mean      max       var        sd
1  studres 32 -1.633098 -4.054238e-17 2.245157 0.9354839 0.9672042

$df_table_residuals_plot010
  variable  n       min          mean      max       var        sd
1  studres 32 -1.633098 -4.054238e-17 2.245157 0.9354839 0.9672042

Stock

El script realiza todo lo relacionado al análisis de datos enmarcado en un diseño a un factor de efecto fijos completamente aleatorizado.

El scriopt realiza:
- 4 test estadísticos.
- 15 gráficos.
- 20 tablas.

Cuando se habla de realizar el un Anova, en realidad no es solo realizar el Test de Análisis de la varianza, sino el test en si mismo con todos los anexos y accesorios indispensables.

Un poco más en detalle, el listado es el siguiente:
- Test ANOVA a 1 Factor.
- Un test de comparación múltiple (Test de Tukey).
- Test de Normalidad Shapiro-Wilks para los residuos.
- Test de Homogeneidad de Varianzas de Bartlett para los residuos.
- Estadísticas descriptivas para una variable cuantitativa particionada por lo niveles del factor.
- Estadísticas descriptivas aplicados a los residuos en su totalidad y aplicados a los residuos particionados por el factor.

Rscience aborda toda la complejidad relacionada a un análisis de la varianza a 1 factor ya sea dentro del test Anova, su test de comparaciones múltibple, los test estadísticos para sus requisitos y para la generación de todas las tablas y gráficos asociados.

El script realiza un manejo explícito de los datos faltantes, mendiante la utilización de la función na.omit() previo al ingreso de los datos a las funciones estadísticas. De esa manera se maneja todo el tiempo solo a las filas de datos que efectivamente ingresan al test estadístico, a tablas y a gráficos.

El script contiene todos los criterios estadísticos relacionados a la toma de decisiones de para ANova a 1 Factor, otorgando al usuario frases directas y explícitas sobre cómo interpretar sus datos.

Rscience es tajante, directo y claro en cuanto a la toma de decisiones. Deben tomarse las frases otorgadas como frases que orientan en una buena dirección y sentido a un operador. Rscience solo facilita interpretaciones estadísticas, luego el operador deberá terminar de entender la situación bajo análisis para poder dar interpretaciones de esos resultados en su marco de trabajo o estudio.

Algunos detalles particularmente importantes del script 003 para ANova a 1 Factor, Efectos Fijos, Modelo Lineal General, son los gráficos generados para los niveles del factor utilizando la media de cada nivel y el error estándard obtenido del modelo y no del error estándard obtenido de los datos. La bibliografía estadística indica que esa es la forma correcta de plotear gráficamente lo que tanto anova como el test de comparaciones están realizando analíticamente. Sin embargo en ninguno de los softwares clásicos de estadística ha sido posible nunca obtener dicho gráfico.

También es un gran alivio para el operador poder elegir el orden de presentación de los niveles del factor, ya que por defecto todos los softwares realizan las tablas y los gráficos con el orden alfabético sin la posibilidad de poder realizar este tipo de cambios.

FAQs

En esta sección se dan un conjunto de preguntas tanto desde el punto de vista estadístico, informático o del diseño. Las preguntas y respuestas están mezcladas en su temática.

1) ¿Es mejor el test de Anova que el test t?

2) ¿Es lo mismo variable numérica que variable cuantitativa?.

Rta: Es un error muy común. No. No es lo mismo. Una variable es numérica desde el punto de vista informático si la variable es representada con símbolos numéricos dentro del dataset. Si yo tengo una variable que tiene categorías representada con números entonces la variable no es numérica. Una variable cualitativa ordinal representada con números, como por ejemplo escala de dolor, índice de felicidad, cobertura de suelo determinada a ojo del operador, cuanto me gusta el helado de chocolate del 1 al 10, no son variable cuantitativas.

Si la variable respuesta es cualitativa nominal representada con números o cualitativa ordinal representada con números, en ambos casos la variable es numérica pero no es cuantitativa. En esos casos ingresar al test de Anova es un error grave. El test de Anova es solo para variables cuantitativas. Si se tiene un factor y una variable respuesta cualitativa nominal representada con números, debiera tal vez realizarse un test Chi Cuadrado. Si se tiene un factor y una variable respuesta cualitativa ordinal representada con números, debería utilizarse herramientas estadísticas de Distribución Libre como es el test de Kruskal-Wallis.

  1. ¿Es mejor el test de Anova a 1 fActor que el Test de Kruskal-Wallis? Algo es mejor o peor en un contexto. La idea general de la bibliografía es que si hay un contexto en que podemos poner a prueba una característica de nuestra población con diferentes herramienta estadística, debemos elegir la más potente. Esto quiere decir que de las herramientas estadísticas disponibles y válidas para analizar mis datos, yo debo elegir aquella que tiene más chances de rechazar Ho. En esta frase recalco la palabra válidas.

Si se tiene un factor con una variable respuesta, y la variable respuesta es cuantitativa, y al ingresar al test de Anova los residuos cumplen los requisitos de homogeneidad y normalidad, en ese caso yo tengo para elegir tanto Anova a 1 Factor como el Test de Kruskal-Wallis. En este contexto es mejor Anova que el test de Kruskal-Wallis.

Si se tiene un factor con una variable respuesta, y la variable respuesta es cuantitativa, y al ingresar al test de Anova los residuos no cumplen todos los requisitos de homogeneidad y normalidad, en ese caso Anova ya no es válido como opción. Mi única opción en este contexto es Kruskal-Wallis.

Si se tiene un factor con una variable respuesta, y la variable respuesta es cualitativa ordinal representada con números, Anova ya no es si quiera es una opción ya que no es válido trabajar dentro de anova con datos cualitativos (aunque estén representados con símbolos números). Mi única opción en este contexto es Kruskal-Wallis.

Si se tiene un factor con una variable respuesta, y la variable respuesta es cualitativa nominal representada con números, en este contexto ni Anova ni Kruskal-Wallis son las herramientas correctas para analizar esos datos.

3) ¿Es lo mismo la media de las medias que la media de toda la variable respuesta?

Rta: No. Y es también un error común. Hablemos un poco antes de la media y la mediana. Aunque la media y la mediana pueden tener el mismo valor numérico, no son lo mismo. Que ocasionalmente puedan adquirir ambas el mismo valor no las hace el mismo concepto. La media y la mediana coinciden en cualquier distribución simétrica. Cuando se tienen distribuciones simétricas como es la distribuciòn normal, entonces coinciden media y mediana.

Algo similar ocurre con la media de las medias y la media de toda la variable respuesta. La media de las medias será igual a la media de toda la variable respuesta cuando el “n” de todos los niveles del factor sea el mismo. Solo en ese caso coinciden. En Anova tenemos Mu, Tau y Mui, y entre otras cosas ocurre que la suma de los tau es cero. Eso solo ocurre si mu es estimada con la media de las medias. En Anova a 1 Factor, Mu, que se le suele decir la Media General, o la Esperanza General, es la media de las medias. Puede usted fácilmente corroborar esto calculando los valores Tau y la suma de los valores Tau para un caso y para el otro.

df_selected_vars
  order var_name var_number var_letter var_role doble_reference
1     1      mpg          1          A       RV         RV(mpg)
2     2      cyl          2          B   FACTOR     FACTOR(cyl)
df_show_n
      object n_col n_row
1 my_dataset    11    32
2   minibase     2    32
df_factor_info
  order level  n     mean   color
1     1     6  7 19.74286 #FF0000
2     2     4 11 26.66364 #00FF00
3     3     8 14 15.10000 #0000FF
check_unbalanced_reps
[1] TRUE
phrase_selected_check_unbalanced
[1] "The design is unbalanced in repetitions. A correction is applied to the Tukey test."
df_table_anova
            Df   Sum Sq   Mean Sq  F value       Pr(>F)
cyl          2 824.7846 412.39230 39.69752 4.978919e-09
Residuals   29 301.2626  10.38837       NA           NA
df_table_anova
            Df   Sum Sq   Mean Sq  F value       Pr(>F)
cyl          2 824.7846 412.39230 39.69752 4.978919e-09
Residuals   29 301.2626  10.38837       NA           NA
test_residuals_normality

    Shapiro-Wilk normality test

data:  minibase_mod$residuals
W = 0.97065, p-value = 0.5177
test_residuals_homogeneity

    Bartlett test of homogeneity of variances

data:  residuals by cyl
Bartlett's K-squared = 8.3934, df = 2, p-value = 0.01505
phrase_info_errors
[1] "Anova and Tukey use MSE from model."                                                              
[2] "Bartlett use variance from raw error on each level."                                              
[3] "Only if there is homogeneity from raw error variances then is a good idea take desition from MSE."
df_summary_anova
               test         aim  variable      p_value alpha_value
1 Shapiro-Wilk test   Normality residuals 5.176650e-01        0.05
2     Bartlett test Homogeneity residuals 1.504518e-02        0.05
3       Anova 1 way        Mean       mpg 4.978919e-09        0.05
        Decision
1 Ho no rejected
2    Ho Rejected
3    Ho Rejected
phrase_shapiro_selected 
[1] "The null hypothesis of normal distribution of residuals is not rejected."
phrase_bartlett_selected
[1] "The hypothesis of homogeneity of variances (homoscedasticity) is rejected."
phrase_requeriments_selected 
[1] "Not all model assumptions are met, so it is NOT valid to draw conclusions from the ANOVA test."
phrase_anova_selected
[1] "Regardless of the p-value obtained in ANOVA, it is not valid to draw conclusions."
df_tukey_table
  level     mean group
1     4 26.66364     a
2     6 19.74286     b
3     8 15.10000     c

Theory

Bibliografia

Reproducibility, Transparency and Open Access

The fundamental pillars of Rscience are Reproducibility, Transparency, and Open Access.
We strongly believe that adherence to these principles helps make science better, more reliable, and more objective.

This section provides a complete digital footprint of the computational environment used to generate this report.

By documenting the exact R version, Rscience version, the operating system configuration, and the precise list of installed packages, we guarantee maximum transparency and portability of the analysis, ensuring that the results presented can be accurately validated and replicated by any third party.

To achieve true, byte-for-byte reproducibility, a set of explicit commands leveraging the rix library is provided. These statements enable users to install the exact version of R and Rscience used when generating this report, along with every required dependency package locked to its specific version. This process eliminates environmental variance and ensures the analysis can be faithfully recreated across different systems.

R - Session Info

Shows the currently loaded packages and R environment configuration for this specific analysis. This includes R version, locale settings, and attached packages that are actively being used in this report.

sessionInfo()
R version 4.4.3 (2025-02-28)
Platform: x86_64-pc-linux-gnu
Running under: Ubuntu 22.04.5 LTS

Matrix products: default
BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0 
LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=it_IT.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=it_IT.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=it_IT.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=it_IT.UTF-8 LC_IDENTIFICATION=C       

time zone: Europe/Rome
tzcode source: system (glibc)

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] rlang_1.1.6       kableExtra_1.4.0  dplyr_1.1.4       plotly_4.11.0    
 [5] ggplot2_4.0.0     agricolae_1.3-7   htmlwidgets_1.6.4 stringr_1.5.2    
 [9] knitr_1.50        htmltools_0.5.8.1 fontawesome_0.5.3

loaded via a namespace (and not attached):
 [1] generics_0.1.4     tidyr_1.3.1        xml2_1.4.0         EnvStats_3.1.0    
 [5] stringi_1.8.7      lattice_0.22-5     digest_0.6.37      magrittr_2.0.3    
 [9] evaluate_1.0.3     grid_4.4.3         RColorBrewer_1.1-3 fastmap_1.2.0     
[13] jsonlite_2.0.0     zip_2.3.3          httr_1.4.7         purrr_1.1.0       
[17] crosstalk_1.2.2    viridisLite_0.4.2  scales_1.4.0       textshaping_1.0.4 
[21] lazyeval_0.2.2     cli_3.6.5          withr_3.0.2        yaml_2.3.10       
[25] tools_4.4.3        AlgDesign_1.2.1.2  vctrs_0.6.5        R6_2.6.1          
[29] lifecycle_1.0.4    MASS_7.3-65        cluster_2.1.8.1    pkgconfig_2.0.3   
[33] pillar_1.11.1      openxlsx_4.2.8     gtable_0.3.6       glue_1.8.0        
[37] data.table_1.17.8  Rcpp_1.1.0         systemfonts_1.3.1  xfun_0.53         
[41] tibble_3.3.0       tidyselect_1.2.1   rstudioapi_0.17.1  farver_2.1.2      
[45] nlme_3.1-168       rmarkdown_2.30     svglite_2.2.1      compiler_4.4.3    
[49] S7_0.2.0          

R Version

R.version
Parameters values
platform x86_64-pc-linux-gnu
arch x86_64
os linux-gnu
system x86_64, linux-gnu
status
major 4
minor 4.3
year 2025
month 02
day 28
svn rev 87843
language R
version.string R version 4.4.3 (2025-02-28)
nickname Trophy Case

R - Installed Libraries

Lists all packages installed in the R environment, providing a complete inventory of available tools. This comprehensive list helps recreate the computational environment for reproducibility purposes.

# All installed package on system
installed.packages()

R - System Info

Displays system-level information including user details, machine name, and operating system specifics. This context is valuable for debugging and understanding the execution environment, particularly in multi-user or server-based scenarios.

Sys.info()
Parameter Value
sysname Linux
release 6.8.0-85-generic
version #85~22.04.1-Ubuntu SMP PREEMPT_DYNAMIC Fri Sep 19 16:18:59 UTC 2
nodename david
machine x86_64
login david
user david
effective_user david